2008-03-11 Basile Starynkevitch <basile@starynkevitch.net>

merged with trunk r133107 git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/branches/melt-branch@133113 138bc75d-0d04-0410-961f-82ee72b054a4
author: bstarynk <bstarynk@138bc75d-0d04-0410-961f-82ee72b054a4> 2008-03-11 15:19:27 +0000
committer: bstarynk <bstarynk@138bc75d-0d04-0410-961f-82ee72b054a4> 2008-03-11 15:19:27 +0000
commit: 84e39114ef36f4f202028695936e3725b9a08474 (patch)
tree: 1b76344540036a57ec23806ad0e4e22b62c916c3 /gcc/config
parent: f778423557464b07c76cbf36e2032cccb94c02bc (diff)
download: gcc-84e39114ef36f4f202028695936e3725b9a08474.tar.gz
28 files changed, 1165 insertions, 312 deletions
diff --git a/gcc/config/arm/arm-cores.def b/gcc/config/arm/arm-cores.def
index 748dcb074d1..cce3195a453 100644
--- a/gcc/config/arm/arm-cores.def
+++ b/gcc/config/arm/arm-cores.def
@@ -1,5 +1,5 @@
 /* ARM CPU Cores
-   Copyright (C) 2003, 2005, 2006, 2007 Free Software Foundation, Inc.
+   Copyright (C) 2003, 2005, 2006, 2007, 2008 Free Software Foundation, Inc.
    Written by CodeSourcery, LLC
 
    This file is part of GCC.
@@ -118,3 +118,4 @@ ARM_CORE("arm1156t2-s",	  arm1156t2s,	6T2,				 FL_LDSCHED, 9e)
 ARM_CORE("cortex-a8",	  cortexa8,	7A,				 FL_LDSCHED, 9e)
 ARM_CORE("cortex-r4",	  cortexr4,	7R,				 FL_LDSCHED, 9e)
 ARM_CORE("cortex-m3",	  cortexm3,	7M,				 FL_LDSCHED, 9e)
+ARM_CORE("cortex-m1",	  cortexm1,	6M,				 FL_LDSCHED, 9e)
diff --git a/gcc/config/arm/arm-tune.md b/gcc/config/arm/arm-tune.md
index 5b4c46f07b7..d73382bc920 100644
--- a/gcc/config/arm/arm-tune.md
+++ b/gcc/config/arm/arm-tune.md
@@ -1,5 +1,5 @@
 ;; -*- buffer-read-only: t -*-
 ;; Generated automatically by gentune.sh from arm-cores.def
 (define_attr "tune"
-	"arm2,arm250,arm3,arm6,arm60,arm600,arm610,arm620,arm7,arm7d,arm7di,arm70,arm700,arm700i,arm710,arm720,arm710c,arm7100,arm7500,arm7500fe,arm7m,arm7dm,arm7dmi,arm8,arm810,strongarm,strongarm110,strongarm1100,strongarm1110,arm7tdmi,arm7tdmis,arm710t,arm720t,arm740t,arm9,arm9tdmi,arm920,arm920t,arm922t,arm940t,ep9312,arm10tdmi,arm1020t,arm9e,arm946es,arm966es,arm968es,arm10e,arm1020e,arm1022e,xscale,iwmmxt,arm926ejs,arm1026ejs,arm1136js,arm1136jfs,arm1176jzs,arm1176jzfs,mpcorenovfp,mpcore,arm1156t2s,cortexa8,cortexr4,cortexm3"
+	"arm2,arm250,arm3,arm6,arm60,arm600,arm610,arm620,arm7,arm7d,arm7di,arm70,arm700,arm700i,arm710,arm720,arm710c,arm7100,arm7500,arm7500fe,arm7m,arm7dm,arm7dmi,arm8,arm810,strongarm,strongarm110,strongarm1100,strongarm1110,arm7tdmi,arm7tdmis,arm710t,arm720t,arm740t,arm9,arm9tdmi,arm920,arm920t,arm922t,arm940t,ep9312,arm10tdmi,arm1020t,arm9e,arm946es,arm966es,arm968es,arm10e,arm1020e,arm1022e,xscale,iwmmxt,arm926ejs,arm1026ejs,arm1136js,arm1136jfs,arm1176jzs,arm1176jzfs,mpcorenovfp,mpcore,arm1156t2s,cortexa8,cortexr4,cortexm3,cortexm1"
 	(const (symbol_ref "arm_tune")))
diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c
index c66b51ec328..bddb0e2771a 100644
--- a/gcc/config/arm/arm.c
+++ b/gcc/config/arm/arm.c
@@ -1,6 +1,6 @@
 /* Output routines for GCC for ARM.
    Copyright (C) 1991, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
-   2002, 2003, 2004, 2005, 2006, 2007 Free Software Foundation, Inc.
+   2002, 2003, 2004, 2005, 2006, 2007, 2008 Free Software Foundation, Inc.
    Contributed by Pieter `Tiggr' Schoenmakers (rcpieter@win.tue.nl)
    and Martin Simmons (@harleqn.co.uk).
    More major hacks by Richard Earnshaw (rearnsha@arm.com).
@@ -188,6 +188,7 @@ static void arm_target_help (void);
 static unsigned HOST_WIDE_INT arm_shift_truncation_mask (enum machine_mode);
 static bool arm_cannot_copy_insn_p (rtx);
 static bool arm_tls_symbol_p (rtx x);
+static int arm_issue_rate (void);
 static void arm_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
 
 
@@ -358,6 +359,9 @@ static void arm_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
 #undef TARGET_CANNOT_FORCE_CONST_MEM
 #define TARGET_CANNOT_FORCE_CONST_MEM arm_cannot_force_const_mem
 
+#undef TARGET_SCHED_ISSUE_RATE
+#define TARGET_SCHED_ISSUE_RATE arm_issue_rate
+
 #undef TARGET_MANGLE_TYPE
 #define TARGET_MANGLE_TYPE arm_mangle_type
 
@@ -460,6 +464,7 @@ static int thumb_call_reg_needed;
 #define FL_FOR_ARCH6Z	FL_FOR_ARCH6
 #define FL_FOR_ARCH6ZK	FL_FOR_ARCH6K
 #define FL_FOR_ARCH6T2	(FL_FOR_ARCH6 | FL_THUMB2)
+#define FL_FOR_ARCH6M	(FL_FOR_ARCH6 & ~FL_NOTM)
 #define FL_FOR_ARCH7	(FL_FOR_ARCH6T2 &~ FL_NOTM)
 #define FL_FOR_ARCH7A	(FL_FOR_ARCH7 | FL_NOTM)
 #define FL_FOR_ARCH7R	(FL_FOR_ARCH7A | FL_DIV)
@@ -632,6 +637,7 @@ static const struct processors all_architectures[] =
   {"armv6z",  arm1176jzs, "6Z",  FL_CO_PROC |             FL_FOR_ARCH6Z, NULL},
   {"armv6zk", arm1176jzs, "6ZK", FL_CO_PROC |             FL_FOR_ARCH6ZK, NULL},
   {"armv6t2", arm1156t2s, "6T2", FL_CO_PROC |             FL_FOR_ARCH6T2, NULL},
+  {"armv6-m", cortexm1,	  "6M",				  FL_FOR_ARCH6M, NULL},
   {"armv7",   cortexa8,	  "7",	 FL_CO_PROC |		  FL_FOR_ARCH7, NULL},
   {"armv7-a", cortexa8,	  "7A",	 FL_CO_PROC |		  FL_FOR_ARCH7A, NULL},
   {"armv7-r", cortexr4,	  "7R",	 FL_CO_PROC |		  FL_FOR_ARCH7R, NULL},
@@ -17639,12 +17645,23 @@ arm_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
                     ? 1 : 0);
   if (mi_delta < 0)
     mi_delta = - mi_delta;
-  /* When generating 16-bit thumb code, thunks are entered in arm mode.  */
+
   if (TARGET_THUMB1)
     {
       int labelno = thunk_label++;
       ASM_GENERATE_INTERNAL_LABEL (label, "LTHUMBFUNC", labelno);
-      fputs ("\tldr\tr12, ", file);
+      /* Thunks are entered in arm mode when avaiable.  */
+      if (TARGET_THUMB1_ONLY)
+	{
+	  /* push r3 so we can use it as a temporary.  */
+	  /* TODO: Omit this save if r3 is not used.  */
+	  fputs ("\tpush {r3}\n", file);
+	  fputs ("\tldr\tr3, ", file);
+	}
+      else
+	{
+	  fputs ("\tldr\tr12, ", file);
+	}
       assemble_name (file, label);
       fputc ('\n', file);
       if (flag_pic)
@@ -17658,29 +17675,63 @@ arm_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
 
 	     Note that we have "+ 1" because some versions of GNU ld
 	     don't set the low bit of the result for R_ARM_REL32
-	     relocations against thumb function symbols.  */
+	     relocations against thumb function symbols.
+	     On ARMv6M this is +4, not +8.  */
 	  ASM_GENERATE_INTERNAL_LABEL (labelpc, "LTHUNKPC", labelno);
 	  assemble_name (file, labelpc);
 	  fputs (":\n", file);
-	  fputs ("\tadd\tr12, pc, r12\n", file);
+	  if (TARGET_THUMB1_ONLY)
+	    {
+	      /* This is 2 insns after the start of the thunk, so we know it
+	         is 4-byte aligned.  */
+	      fputs ("\tadd\tr3, pc, r3\n", file);
+	      fputs ("\tmov r12, r3\n", file);
+	    }
+	  else
+	    fputs ("\tadd\tr12, pc, r12\n", file);
 	}
+      else if (TARGET_THUMB1_ONLY)
+	fputs ("\tmov r12, r3\n", file);
     }
-  /* TODO: Use movw/movt for large constants when available.  */
-  while (mi_delta != 0)
+  if (TARGET_THUMB1_ONLY)
     {
-      if ((mi_delta & (3 << shift)) == 0)
-        shift += 2;
-      else
-        {
-          asm_fprintf (file, "\t%s\t%r, %r, #%d\n",
-                       mi_op, this_regno, this_regno,
-                       mi_delta & (0xff << shift));
-          mi_delta &= ~(0xff << shift);
-          shift += 8;
-        }
+      if (mi_delta > 255)
+	{
+	  fputs ("\tldr\tr3, ", file);
+	  assemble_name (file, label);
+	  fputs ("+4\n", file);
+	  asm_fprintf (file, "\t%s\t%r, %r, r3\n",
+		       mi_op, this_regno, this_regno);
+	}
+      else if (mi_delta != 0)
+	{
+	  asm_fprintf (file, "\t%s\t%r, %r, #%d\n",
+		       mi_op, this_regno, this_regno,
+		       mi_delta);
+	}
+    }
+  else
+    {
+      /* TODO: Use movw/movt for large constants when available.  */
+      while (mi_delta != 0)
+	{
+	  if ((mi_delta & (3 << shift)) == 0)
+	    shift += 2;
+	  else
+	    {
+	      asm_fprintf (file, "\t%s\t%r, %r, #%d\n",
+			   mi_op, this_regno, this_regno,
+			   mi_delta & (0xff << shift));
+	      mi_delta &= ~(0xff << shift);
+	      shift += 8;
+	    }
+	}
     }
   if (TARGET_THUMB1)
     {
+      if (TARGET_THUMB1_ONLY)
+	fputs ("\tpop\t{r3}\n", file);
+
       fprintf (file, "\tbx\tr12\n");
       ASM_OUTPUT_ALIGN (file, 2);
       assemble_name (file, label);
@@ -17699,6 +17750,9 @@ arm_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
       else
 	/* Output ".word .LTHUNKn".  */
 	assemble_integer (XEXP (DECL_RTL (function), 0), 4, BITS_PER_WORD, 1);
+
+      if (TARGET_THUMB1_ONLY && mi_delta > 255)
+	assemble_integer (GEN_INT(mi_delta), 4, BITS_PER_WORD, 1);
     }
   else
     {
@@ -18660,6 +18714,22 @@ thumb2_output_casesi (rtx *operands)
     }
 }
 
+/* Most ARM cores are single issue, but some newer ones can dual issue.
+   The scheduler descriptions rely on this being correct.  */
+static int
+arm_issue_rate (void)
+{
+  switch (arm_tune)
+    {
+    case cortexr4:
+    case cortexa8:
+      return 2;
+
+    default:
+      return 1;
+    }
+}
+
 /* A table and a function to perform ARM-specific name mangling for
    NEON vector types in order to conform to the AAPCS (see "Procedure
    Call Standard for the ARM Architecture", Appendix A).  To qualify
diff --git a/gcc/config/arm/arm.h b/gcc/config/arm/arm.h
index fcb90ab3ca6..d93476ba77c 100644
--- a/gcc/config/arm/arm.h
+++ b/gcc/config/arm/arm.h
@@ -1,6 +1,7 @@
 /* Definitions of target machine for GNU compiler, for ARM.
    Copyright (C) 1991, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
-   2001, 2002, 2003, 2004, 2005, 2006, 2007 Free Software Foundation, Inc.
+   2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008
+   Free Software Foundation, Inc.
    Contributed by Pieter `Tiggr' Schoenmakers (rcpieter@win.tue.nl)
    and Martin Simmons (@harleqn.co.uk).
    More major hacks by Richard Earnshaw (rearnsha@arm.com)
@@ -207,6 +208,8 @@ extern void (*arm_lang_output_object_attributes_hook)(void);
 #define TARGET_32BIT			(TARGET_ARM || arm_arch_thumb2)
 /* 32-bit Thumb-2 code.  */
 #define TARGET_THUMB2			(TARGET_THUMB && arm_arch_thumb2)
+/* Thumb-1 only.  */
+#define TARGET_THUMB1_ONLY		(TARGET_THUMB1 && !arm_arch_notm)
 
 /* The following two macros concern the ability to execute coprocessor
    instructions for VFPv3 or NEON.  TARGET_VFP3 is currently only ever
@@ -2397,7 +2400,8 @@ extern int making_const_table;
       if (TARGET_THUMB) 				\
         {						\
           if (is_called_in_ARM_mode (DECL)		\
-	      || (TARGET_THUMB1 && current_function_is_thunk))	\
+	      || (TARGET_THUMB1 && !TARGET_THUMB1_ONLY	\
+		  && current_function_is_thunk))	\
             fprintf (STREAM, "\t.code 32\n") ;		\
           else if (TARGET_THUMB1)			\
            fprintf (STREAM, "\t.code\t16\n\t.thumb_func\n") ;	\
diff --git a/gcc/config/arm/arm.md b/gcc/config/arm/arm.md
index 765b89bb84c..5b40449cee1 100644
--- a/gcc/config/arm/arm.md
+++ b/gcc/config/arm/arm.md
@@ -1,6 +1,7 @@
 ;;- Machine description for ARM for GNU compiler
 ;;  Copyright 1991, 1993, 1994, 1995, 1996, 1996, 1997, 1998, 1999, 2000,
-;;  2001, 2002, 2003, 2004, 2005, 2006, 2007 Free Software Foundation, Inc.
+;;  2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008
+;;  Free Software Foundation, Inc.
 ;;  Contributed by Pieter `Tiggr' Schoenmakers (rcpieter@win.tue.nl)
 ;;  and Martin Simmons (@harleqn.co.uk).
 ;;  More major hacks by Richard Earnshaw (rearnsha@arm.com).
@@ -93,9 +94,9 @@
    (UNSPEC_TLS      20) ; A symbol that has been treated properly for TLS usage.
    (UNSPEC_PIC_LABEL 21) ; A label used for PIC access that does not appear in the
                          ; instruction stream.
-   (UNSPEC_STACK_ALIGN 20) ; Doubleword aligned stack pointer.  Used to
+   (UNSPEC_STACK_ALIGN 22) ; Doubleword aligned stack pointer.  Used to
 			   ; generate correct unwind information.
-   (UNSPEC_PIC_OFFSET 22) ; A symbolic 12-bit OFFSET that has been treated
+   (UNSPEC_PIC_OFFSET 23) ; A symbolic 12-bit OFFSET that has been treated
 			  ; correctly for PIC usage.
   ]
 )
@@ -183,7 +184,7 @@
 ;; scheduling information.
 
 (define_attr "insn"
-        "mov,mvn,smulxy,smlaxy,smlalxy,smulwy,smlawx,mul,muls,mla,mlas,umull,umulls,umlal,umlals,smull,smulls,smlal,smlals,smlawy,smuad,smuadx,smlad,smladx,smusd,smusdx,smlsd,smlsdx,smmul,smmulr,smmla,umaal,smlald,smlsld,clz,mrs,msr,xtab,other"
+        "mov,mvn,smulxy,smlaxy,smlalxy,smulwy,smlawx,mul,muls,mla,mlas,umull,umulls,umlal,umlals,smull,smulls,smlal,smlals,smlawy,smuad,smuadx,smlad,smladx,smusd,smusdx,smlsd,smlsdx,smmul,smmulr,smmla,umaal,smlald,smlsld,clz,mrs,msr,xtab,sdiv,udiv,other"
         (const_string "other"))
 
 ; TYPE attribute is used to detect floating point instructions which, if
@@ -332,7 +333,7 @@
 
 (define_attr "generic_sched" "yes,no"
   (const (if_then_else 
-          (eq_attr "tune" "arm926ejs,arm1020e,arm1026ejs,arm1136js,arm1136jfs,cortexa8")
+          (eq_attr "tune" "arm926ejs,arm1020e,arm1026ejs,arm1136js,arm1136jfs,cortexa8,cortexr4")
           (const_string "no")
           (const_string "yes"))))
 
@@ -349,6 +350,7 @@
 (include "arm1026ejs.md")
 (include "arm1136jfs.md")
 (include "cortex-a8.md")
+(include "cortex-r4.md")
 
 
 ;;---------------------------------------------------------------------------
diff --git a/gcc/config/arm/bpabi-v6m.S b/gcc/config/arm/bpabi-v6m.S
new file mode 100644
index 00000000000..fa3b9c41478
--- /dev/null
+++ b/gcc/config/arm/bpabi-v6m.S
@@ -0,0 +1,280 @@
+/* Miscellaneous BPABI functions.  ARMv6M implementation
+
+   Copyright (C) 2006, 2008  Free Software Foundation, Inc.
+   Contributed by CodeSourcery.
+
+   This file is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published by the
+   Free Software Foundation; either version 2, or (at your option) any
+   later version.
+
+   In addition to the permissions in the GNU General Public License, the
+   Free Software Foundation gives you unlimited permission to link the
+   compiled version of this file into combinations with other programs,
+   and to distribute those combinations without any restriction coming
+   from the use of this file.  (The General Public License restrictions
+   do apply in other respects; for example, they cover modification of
+   the file, and distribution when not linked into a combine
+   executable.)
+
+   This file is distributed in the hope that it will be useful, but
+   WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; see the file COPYING.  If not, write to
+   the Free Software Foundation, 51 Franklin Street, Fifth Floor,
+   Boston, MA 02110-1301, USA.  */
+
+#ifdef __ARMEB__
+#define xxh r0
+#define xxl r1
+#define yyh r2
+#define yyl r3
+#else
+#define xxh r1
+#define xxl r0
+#define yyh r3
+#define yyl r2
+#endif	
+	
+#ifdef L_aeabi_lcmp
+
+FUNC_START aeabi_lcmp
+	cmp	xxh, yyh
+	beq	1f
+	bgt	2f
+	mov	r0, #1
+	neg	r0, r0
+	RET
+2:
+	mov	r0, #1
+	RET
+1:
+	sub	r0, xxl, yyl
+	beq	1f
+	bhi	2f
+	mov	r0, #1
+	neg	r0, r0
+	RET
+2:
+	mov	r0, #1
+1:
+	RET
+	FUNC_END aeabi_lcmp
+
+#endif /* L_aeabi_lcmp */
+	
+#ifdef L_aeabi_ulcmp
+
+FUNC_START aeabi_ulcmp
+	cmp	xxh, yyh
+	bne	1f
+	sub	r0, xxl, yyl
+	beq	2f
+1:
+	bcs	1f
+	mov	r0, #1
+	neg	r0, r0
+	RET
+1:
+	mov	r0, #1
+2:
+	RET
+	FUNC_END aeabi_ulcmp
+
+#endif /* L_aeabi_ulcmp */
+
+#ifdef L_aeabi_ldivmod
+
+FUNC_START aeabi_ldivmod
+	push {r0, r1}
+	mov r0, sp
+	push {r0, lr}
+	ldr r0, [sp, #8]
+	bl SYM(__gnu_ldivmod_helper)
+	ldr r3, [sp, #4]
+	mov lr, r3
+	add sp, sp, #8
+	pop {r2, r3}
+	RET
+	FUNC_END aeabi_ldivmod
+
+#endif /* L_aeabi_ldivmod */
+
+#ifdef L_aeabi_uldivmod
+
+FUNC_START aeabi_uldivmod
+	push {r0, r1}
+	mov r0, sp
+	push {r0, lr}
+	ldr r0, [sp, #8]
+	bl SYM(__gnu_uldivmod_helper)
+	ldr r3, [sp, #4]
+	mov lr, r3
+	add sp, sp, #8
+	pop {r2, r3}
+	RET
+	FUNC_END aeabi_uldivmod
+	
+#endif /* L_aeabi_uldivmod */
+
+#ifdef L_arm_addsubsf3
+
+FUNC_START aeabi_frsub
+
+      push	{r4, lr}
+      mov	r4, #1
+      lsl	r4, #31
+      eor	r0, r0, r4
+      bl	__aeabi_fadd
+      pop	{r4, pc}
+
+      FUNC_END aeabi_frsub
+
+#endif /* L_arm_addsubsf3 */
+
+#ifdef L_arm_cmpsf2
+
+FUNC_START aeabi_cfrcmple
+
+	mov	ip, r0
+	mov	r0, r1
+	mov	r1, ip
+	b	6f
+
+FUNC_START aeabi_cfcmpeq
+FUNC_ALIAS aeabi_cfcmple aeabi_cfcmpeq
+
+	@ The status-returning routines are required to preserve all
+	@ registers except ip, lr, and cpsr.
+6:	push	{r0, r1, r2, r3, r4, lr}
+	bl	__lesf2
+	@ Set the Z flag correctly, and the C flag unconditionally.
+	cmp	r0, #0
+	@ Clear the C flag if the return value was -1, indicating
+	@ that the first operand was smaller than the second.
+	bmi 1f
+	mov	r1, #0
+	cmn	r0, r1
+1:
+	pop	{r0, r1, r2, r3, r4, pc}
+
+	FUNC_END aeabi_cfcmple
+	FUNC_END aeabi_cfcmpeq
+	FUNC_END aeabi_cfrcmple
+
+FUNC_START	aeabi_fcmpeq
+
+	push	{r4, lr}
+	bl	__eqsf2
+	neg	r0, r0
+	add	r0, r0, #1
+	pop	{r4, pc}
+
+	FUNC_END aeabi_fcmpeq
+
+.macro COMPARISON cond, helper, mode=sf2
+FUNC_START	aeabi_fcmp\cond
+
+	push	{r4, lr}
+	bl	__\helper\mode
+	cmp	r0, #0
+	b\cond	1f
+	mov	r0, #0
+	pop	{r4, pc}
+1:
+	mov	r0, #1
+	pop	{r4, pc}
+
+	FUNC_END aeabi_fcmp\cond
+.endm
+
+COMPARISON lt, le
+COMPARISON le, le
+COMPARISON gt, ge
+COMPARISON ge, ge
+
+#endif /* L_arm_cmpsf2 */
+
+#ifdef L_arm_addsubdf3
+
+FUNC_START aeabi_drsub
+
+      push	{r4, lr}
+      mov	r4, #1
+      lsl	r4, #31
+      eor	xxh, xxh, r4
+      bl	__aeabi_dadd
+      pop	{r4, pc}
+
+      FUNC_END aeabi_drsub
+
+#endif /* L_arm_addsubdf3 */
+
+#ifdef L_arm_cmpdf2
+
+FUNC_START aeabi_cdrcmple
+
+	mov	ip, r0
+	mov	r0, r2
+	mov	r2, ip
+	mov	ip, r1
+	mov	r1, r3
+	mov	r3, ip
+	b	6f
+
+FUNC_START aeabi_cdcmpeq
+FUNC_ALIAS aeabi_cdcmple aeabi_cdcmpeq
+
+	@ The status-returning routines are required to preserve all
+	@ registers except ip, lr, and cpsr.
+6:	push	{r0, r1, r2, r3, r4, lr}
+	bl	__ledf2
+	@ Set the Z flag correctly, and the C flag unconditionally.
+	cmp	r0, #0
+	@ Clear the C flag if the return value was -1, indicating
+	@ that the first operand was smaller than the second.
+	bmi 1f
+	mov	r1, #0
+	cmn	r0, r1
+1:
+	pop	{r0, r1, r2, r3, r4, pc}
+
+	FUNC_END aeabi_cdcmple
+	FUNC_END aeabi_cdcmpeq
+	FUNC_END aeabi_cdrcmple
+
+FUNC_START	aeabi_dcmpeq
+
+	push	{r4, lr}
+	bl	__eqdf2
+	neg	r0, r0
+	add	r0, r0, #1
+	pop	{r4, pc}
+
+	FUNC_END aeabi_dcmpeq
+
+.macro COMPARISON cond, helper, mode=df2
+FUNC_START	aeabi_dcmp\cond
+
+	push	{r4, lr}
+	bl	__\helper\mode
+	cmp	r0, #0
+	b\cond	1f
+	mov	r0, #0
+	pop	{r4, pc}
+1:
+	mov	r0, #1
+	pop	{r4, pc}
+
+	FUNC_END aeabi_dcmp\cond
+.endm
+
+COMPARISON lt, le
+COMPARISON le, le
+COMPARISON gt, ge
+COMPARISON ge, ge
+
+#endif /* L_arm_cmpdf2 */
diff --git a/gcc/config/arm/bpabi.h b/gcc/config/arm/bpabi.h
index a67f6498cdd..0f3b24faaf3 100644
--- a/gcc/config/arm/bpabi.h
+++ b/gcc/config/arm/bpabi.h
@@ -1,5 +1,5 @@
 /* Configuration file for ARM BPABI targets.
-   Copyright (C) 2004, 2005, 2007
+   Copyright (C) 2004, 2005, 2007, 2008
    Free Software Foundation, Inc.
    Contributed by CodeSourcery, LLC   
 
@@ -99,6 +99,21 @@
 #define DECLARE_LIBRARY_RENAMES RENAME_LIBRARY (floatdisf, l2f)
 #endif
 
+/* These renames are needed on ARMv6M.  Other targets get them from
+   assembly routines.  */
+#ifdef L_fixunsdfsi
+#define DECLARE_LIBRARY_RENAMES RENAME_LIBRARY (fixunsdfsi, d2uiz)
+#endif
+#ifdef L_fixunssfsi
+#define DECLARE_LIBRARY_RENAMES RENAME_LIBRARY (fixunssfsi, f2uiz)
+#endif
+#ifdef L_floatundidf
+#define DECLARE_LIBRARY_RENAMES RENAME_LIBRARY (floatundidf, ul2d)
+#endif
+#ifdef L_floatundisf
+#define DECLARE_LIBRARY_RENAMES RENAME_LIBRARY (floatundisf, ul2f)
+#endif
+
 /* The BPABI requires that we always use an out-of-line implementation
    of RTTI comparison, even if the target supports weak symbols,
    because the same object file might be used on a target that does
diff --git a/gcc/config/arm/cortex-r4.md b/gcc/config/arm/cortex-r4.md
new file mode 100644
index 00000000000..d912f205994
--- /dev/null
+++ b/gcc/config/arm/cortex-r4.md
@@ -0,0 +1,288 @@
+;; ARM Cortex-R4 scheduling description.
+;; Copyright (C) 2007, 2008 Free Software Foundation, Inc.
+;; Contributed by CodeSourcery.
+
+;; This file is part of GCC.
+
+;; GCC is distributed in the hope that it will be useful, but WITHOUT
+;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+;; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+;; License for more details.
+
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING.  If not, write to
+;; the Free Software Foundation, 51 Franklin Street, Fifth Floor,
+;; Boston, MA 02110-1301, USA.
+
+(define_automaton "cortex_r4")
+
+;; We approximate the dual-issue constraints of this core using four
+;; "issue units" and a reservation matrix as follows.  The numbers indicate
+;; the instruction groups' preferences in order.  Multiple entries for
+;; the same numbered preference indicate units that must be reserved
+;; together.
+;;
+;; Issue unit:		A	B	C	ALU
+;;
+;; ALU w/o reg shift	1st	2nd		1st and 2nd
+;; ALU w/ reg shift	1st	2nd	2nd	1st and 2nd
+;; Moves		1st	2nd		2nd
+;; Multiplication	1st			1st
+;; Division		1st			1st
+;; Load/store single	1st		1st
+;; Other load/store	1st	1st
+;; Branches			1st
+
+(define_cpu_unit "cortex_r4_issue_a" "cortex_r4")
+(define_cpu_unit "cortex_r4_issue_b" "cortex_r4")
+(define_cpu_unit "cortex_r4_issue_c" "cortex_r4")
+(define_cpu_unit "cortex_r4_issue_alu" "cortex_r4")
+
+(define_reservation "cortex_r4_alu"
+                    "(cortex_r4_issue_a+cortex_r4_issue_alu)|\
+                     (cortex_r4_issue_b+cortex_r4_issue_alu)")
+(define_reservation "cortex_r4_alu_shift_reg"
+                    "(cortex_r4_issue_a+cortex_r4_issue_alu)|\
+                     (cortex_r4_issue_b+cortex_r4_issue_c+\
+                      cortex_r4_issue_alu)")
+(define_reservation "cortex_r4_mov"
+                    "cortex_r4_issue_a|(cortex_r4_issue_b+\
+                     cortex_r4_issue_alu)")
+(define_reservation "cortex_r4_mul" "cortex_r4_issue_a+cortex_r4_issue_alu")
+(define_reservation "cortex_r4_mul_2"
+                    "(cortex_r4_issue_a+cortex_r4_issue_alu)*2")
+;; Division instructions execute out-of-order with respect to the
+;; rest of the pipeline and only require reservations on their first and
+;; final cycles.
+(define_reservation "cortex_r4_div_9"
+                    "cortex_r4_issue_a+cortex_r4_issue_alu,\
+                     nothing*7,\
+                     cortex_r4_issue_a+cortex_r4_issue_alu")
+(define_reservation "cortex_r4_div_10"
+                    "cortex_r4_issue_a+cortex_r4_issue_alu,\
+                     nothing*8,\
+                     cortex_r4_issue_a+cortex_r4_issue_alu")
+(define_reservation "cortex_r4_load_store"
+                    "cortex_r4_issue_a+cortex_r4_issue_c")
+(define_reservation "cortex_r4_load_store_2"
+                    "(cortex_r4_issue_a+cortex_r4_issue_b)*2")
+(define_reservation "cortex_r4_branch" "cortex_r4_issue_b")
+
+;; We assume that all instructions are unconditional.
+
+;; Data processing instructions.  Moves without shifts are kept separate
+;; for the purposes of the dual-issue constraints above.
+(define_insn_reservation "cortex_r4_alu" 2
+  (and (eq_attr "tune" "cortexr4")
+       (and (eq_attr "type" "alu")
+            (not (eq_attr "insn" "mov"))))
+  "cortex_r4_alu")
+
+(define_insn_reservation "cortex_r4_mov" 2
+  (and (eq_attr "tune" "cortexr4")
+       (and (eq_attr "type" "alu")
+            (eq_attr "insn" "mov")))
+  "cortex_r4_mov")
+
+(define_insn_reservation "cortex_r4_alu_shift" 2
+  (and (eq_attr "tune" "cortexr4")
+       (eq_attr "type" "alu_shift"))
+  "cortex_r4_alu")
+
+(define_insn_reservation "cortex_r4_alu_shift_reg" 2
+  (and (eq_attr "tune" "cortexr4")
+       (eq_attr "type" "alu_shift_reg"))
+  "cortex_r4_alu_shift_reg")
+
+;; An ALU instruction followed by an ALU instruction with no early dep.
+(define_bypass 1 "cortex_r4_alu,cortex_r4_alu_shift,cortex_r4_alu_shift_reg,\
+                  cortex_r4_mov"
+               "cortex_r4_alu")
+(define_bypass 1 "cortex_r4_alu,cortex_r4_alu_shift,cortex_r4_alu_shift_reg,\
+                  cortex_r4_mov"
+               "cortex_r4_alu_shift"
+               "arm_no_early_alu_shift_dep")
+(define_bypass 1 "cortex_r4_alu,cortex_r4_alu_shift,cortex_r4_alu_shift_reg,\
+                  cortex_r4_mov"
+               "cortex_r4_alu_shift_reg"
+               "arm_no_early_alu_shift_value_dep")
+
+;; In terms of availabilities, a consumer mov could theoretically be
+;; issued together with a producer ALU instruction, without stalls.
+;; In practice this cannot happen because mov;add (in that order) is not
+;; eligible for dual issue and furthermore dual issue is not permitted
+;; when a dependency is involved.  We therefore note it as latency one.
+;; A mov followed by another of the same is also latency one.
+(define_bypass 1 "cortex_r4_alu,cortex_r4_alu_shift,cortex_r4_alu_shift_reg,\
+                  cortex_r4_mov"
+               "cortex_r4_mov")
+
+;; qadd, qdadd, qsub and qdsub are not currently emitted, and neither are
+;; media data processing instructions nor sad instructions.
+
+;; Multiplication instructions.
+
+(define_insn_reservation "cortex_r4_mul_4" 4
+  (and (eq_attr "tune" "cortexr4")
+       (eq_attr "insn" "mul,smmul"))
+  "cortex_r4_mul_2")
+
+(define_insn_reservation "cortex_r4_mul_3" 3
+  (and (eq_attr "tune" "cortexr4")
+       (eq_attr "insn" "smulxy,smulwy,smuad,smusd"))
+  "cortex_r4_mul")
+
+(define_insn_reservation "cortex_r4_mla_4" 4
+  (and (eq_attr "tune" "cortexr4")
+       (eq_attr "insn" "mla,smmla"))
+  "cortex_r4_mul_2")
+
+(define_insn_reservation "cortex_r4_mla_3" 3
+  (and (eq_attr "tune" "cortexr4")
+       (eq_attr "insn" "smlaxy,smlawy,smlad,smlsd"))
+  "cortex_r4_mul")
+
+(define_insn_reservation "cortex_r4_smlald" 3
+  (and (eq_attr "tune" "cortexr4")
+       (eq_attr "insn" "smlald,smlsld"))
+  "cortex_r4_mul")
+
+(define_insn_reservation "cortex_r4_mull" 4
+  (and (eq_attr "tune" "cortexr4")
+       (eq_attr "insn" "smull,umull,umlal,umaal"))
+  "cortex_r4_mul_2")
+
+;; A multiply or an MLA with a single-register result, followed by an
+;; MLA with an accumulator dependency, has its result forwarded.
+(define_bypass 2 "cortex_r4_mul_3,cortex_r4_mla_3"
+               "cortex_r4_mla_3,cortex_r4_mla_4"
+               "arm_mac_accumulator_is_mul_result")
+
+(define_bypass 3 "cortex_r4_mul_4,cortex_r4_mla_4"
+               "cortex_r4_mla_3,cortex_r4_mla_4"
+               "arm_mac_accumulator_is_mul_result")
+
+;; A multiply followed by an ALU instruction needing the multiply
+;; result only at ALU has lower latency than one needing it at Shift.
+(define_bypass 2 "cortex_r4_mul_3,cortex_r4_mla_3,cortex_r4_smlald"
+               "cortex_r4_alu")
+(define_bypass 2 "cortex_r4_mul_3,cortex_r4_mla_3,cortex_r4_smlald"
+               "cortex_r4_alu_shift"
+               "arm_no_early_alu_shift_dep")
+(define_bypass 2 "cortex_r4_mul_3,cortex_r4_mla_3,cortex_r4_smlald"
+               "cortex_r4_alu_shift_reg"
+               "arm_no_early_alu_shift_value_dep")
+(define_bypass 3 "cortex_r4_mul_4,cortex_r4_mla_4,cortex_r4_mull"
+               "cortex_r4_alu")
+(define_bypass 3 "cortex_r4_mul_4,cortex_r4_mla_4,cortex_r4_mull"
+               "cortex_r4_alu_shift"
+               "arm_no_early_alu_shift_dep")
+(define_bypass 3 "cortex_r4_mul_4,cortex_r4_mla_4,cortex_r4_mull"
+               "cortex_r4_alu_shift_reg"
+               "arm_no_early_alu_shift_value_dep")
+
+;; A multiply followed by a mov has one cycle lower latency again.
+(define_bypass 1 "cortex_r4_mul_3,cortex_r4_mla_3,cortex_r4_smlald"
+               "cortex_r4_mov")
+(define_bypass 2 "cortex_r4_mul_4,cortex_r4_mla_4,cortex_r4_mull"
+               "cortex_r4_mov")
+
+;; We guess that division of A/B using sdiv or udiv, on average, 
+;; is performed with B having ten more leading zeros than A.
+;; This gives a latency of nine for udiv and ten for sdiv.
+(define_insn_reservation "cortex_r4_udiv" 9
+  (and (eq_attr "tune" "cortexr4")
+       (eq_attr "insn" "udiv"))
+  "cortex_r4_div_9")
+
+(define_insn_reservation "cortex_r4_sdiv" 10
+  (and (eq_attr "tune" "cortexr4")
+       (eq_attr "insn" "sdiv"))
+  "cortex_r4_div_10")
+
+;; Branches.  We assume correct prediction.
+
+(define_insn_reservation "cortex_r4_branch" 0
+  (and (eq_attr "tune" "cortexr4")
+       (eq_attr "type" "branch"))
+  "cortex_r4_branch")
+
+;; Call latencies are not predictable.  A semi-arbitrary very large
+;; number is used as "positive infinity" so that everything should be
+;; finished by the time of return.
+(define_insn_reservation "cortex_r4_call" 32
+  (and (eq_attr "tune" "cortexr4")
+       (eq_attr "type" "call"))
+  "nothing")
+
+;; Status register access instructions are not currently emitted.
+
+;; Load instructions.
+;; We do not model the "addr_md_3cycle" cases and assume that
+;; accesses following are correctly aligned.
+
+(define_insn_reservation "cortex_r4_load_1_2" 3
+  (and (eq_attr "tune" "cortexr4")
+       (eq_attr "type" "load1,load2"))
+  "cortex_r4_load_store")
+
+(define_insn_reservation "cortex_r4_load_3_4" 4
+  (and (eq_attr "tune" "cortexr4")
+       (eq_attr "type" "load3,load4"))
+  "cortex_r4_load_store_2")
+
+;; If a producing load is followed by an instruction consuming only
+;; as a Normal Reg, there is one fewer cycle of latency.
+
+(define_bypass 2 "cortex_r4_load_1_2"
+               "cortex_r4_alu")
+(define_bypass 2 "cortex_r4_load_1_2"
+               "cortex_r4_alu_shift"
+               "arm_no_early_alu_shift_dep")
+(define_bypass 2 "cortex_r4_load_1_2"
+               "cortex_r4_alu_shift_reg"
+               "arm_no_early_alu_shift_value_dep")
+
+(define_bypass 3 "cortex_r4_load_3_4"
+               "cortex_r4_alu")
+(define_bypass 3 "cortex_r4_load_3_4"
+               "cortex_r4_alu_shift"
+               "arm_no_early_alu_shift_dep")
+(define_bypass 3 "cortex_r4_load_3_4"
+               "cortex_r4_alu_shift_reg"
+               "arm_no_early_alu_shift_value_dep")
+
+;; If a producing load is followed by an instruction consuming only
+;; as a Late Reg, there are two fewer cycles of latency.  Such consumer
+;; instructions are moves and stores.
+
+(define_bypass 1 "cortex_r4_load_1_2"
+               "cortex_r4_mov,cortex_r4_store_1_2,cortex_r4_store_3_4")
+(define_bypass 2 "cortex_r4_load_3_4"
+               "cortex_r4_mov,cortex_r4_store_1_2,cortex_r4_store_3_4")
+
+;; If a producer's result is required as the base or offset of a load,
+;; there is an extra cycle latency.
+
+(define_bypass 3 "cortex_r4_alu,cortex_r4_mov,cortex_r4_alu_shift,\
+                  cortex_r4_alu_shift_reg"
+               "cortex_r4_load_1_2,cortex_r4_load_3_4")
+
+(define_bypass 4 "cortex_r4_mul_3,cortex_r4_mla_3,cortex_r4_smlald"
+               "cortex_r4_load_1_2,cortex_r4_load_3_4")
+
+(define_bypass 5 "cortex_r4_mul_4,cortex_r4_mla_4,cortex_r4_mull"
+               "cortex_r4_load_1_2,cortex_r4_load_3_4")
+
+;; Store instructions.
+
+(define_insn_reservation "cortex_r4_store_1_2" 0
+  (and (eq_attr "tune" "cortexr4")
+       (eq_attr "type" "store1,store2"))
+  "cortex_r4_load_store")
+
+(define_insn_reservation "cortex_r4_store_3_4" 0
+  (and (eq_attr "tune" "cortexr4")
+       (eq_attr "type" "store3,store4"))
+  "cortex_r4_load_store_2")
+
diff --git a/gcc/config/arm/elf.h b/gcc/config/arm/elf.h
index 65bd00fdc3c..37c366d5ac8 100644
--- a/gcc/config/arm/elf.h
+++ b/gcc/config/arm/elf.h
@@ -1,7 +1,7 @@
 /* Definitions of target machine for GNU compiler.
    For ARM with ELF obj format.
-   Copyright (C) 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2004, 2005, 2007
-   Free Software Foundation, Inc.
+   Copyright (C) 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2004, 2005, 2007,
+   2008 Free Software Foundation, Inc.
    Contributed by Philip Blundell <philb@gnu.org> and
    Catherine Moore <clm@cygnus.com>
    
@@ -145,3 +145,17 @@
     }							\
   while (0)
 
+/* Horrible hack: We want to prevent some libgcc routines being included
+   for some multilibs.  */
+#ifndef __ARM_ARCH_6M__
+#undef L_fixdfsi
+#undef L_fixunsdfsi
+#undef L_truncdfsf2
+#undef L_fixsfsi
+#undef L_fixunssfsi
+#undef L_floatdidf
+#undef L_floatdisf
+#undef L_floatundidf
+#undef L_floatundisf
+#endif
+
diff --git a/gcc/config/arm/ieee754-df.S b/gcc/config/arm/ieee754-df.S
index ebf7e58dabb..a175fa6bb74 100644
--- a/gcc/config/arm/ieee754-df.S
+++ b/gcc/config/arm/ieee754-df.S
@@ -1,6 +1,6 @@
 /* ieee754-df.S double-precision floating point support for ARM
 
-   Copyright (C) 2003, 2004, 2005, 2007  Free Software Foundation, Inc.
+   Copyright (C) 2003, 2004, 2005, 2007, 2008  Free Software Foundation, Inc.
    Contributed by Nicolas Pitre (nico@cam.org)
 
    This file is free software; you can redistribute it and/or modify it
@@ -56,7 +56,7 @@
 #endif
 
 
-#ifdef L_negdf2
+#ifdef L_arm_negdf2
 
 ARM_FUNC_START negdf2
 ARM_FUNC_ALIAS aeabi_dneg negdf2
@@ -70,7 +70,7 @@ ARM_FUNC_ALIAS aeabi_dneg negdf2
 
 #endif
 
-#ifdef L_addsubdf3
+#ifdef L_arm_addsubdf3
 
 ARM_FUNC_START aeabi_drsub
 
@@ -603,7 +603,7 @@ LSYM(f0_ret):
 
 #endif /* L_addsubdf3 */
 
-#ifdef L_muldivdf3
+#ifdef L_arm_muldivdf3
 
 ARM_FUNC_START muldf3
 ARM_FUNC_ALIAS aeabi_dmul muldf3
@@ -1103,7 +1103,7 @@ LSYM(Ldv_s):
 
 #endif /* L_muldivdf3 */
 
-#ifdef L_cmpdf2
+#ifdef L_arm_cmpdf2
 
 @ Note: only r0 (return value) and ip are clobbered here.
 
@@ -1271,7 +1271,7 @@ ARM_FUNC_START	aeabi_dcmpgt
 
 #endif /* L_cmpdf2 */
 
-#ifdef L_unorddf2
+#ifdef L_arm_unorddf2
 
 ARM_FUNC_START unorddf2
 ARM_FUNC_ALIAS aeabi_dcmpun unorddf2
@@ -1297,7 +1297,7 @@ ARM_FUNC_ALIAS aeabi_dcmpun unorddf2
 
 #endif /* L_unorddf2 */
 
-#ifdef L_fixdfsi
+#ifdef L_arm_fixdfsi
 
 ARM_FUNC_START fixdfsi
 ARM_FUNC_ALIAS aeabi_d2iz fixdfsi
@@ -1339,7 +1339,7 @@ ARM_FUNC_ALIAS aeabi_d2iz fixdfsi
 
 #endif /* L_fixdfsi */
 
-#ifdef L_fixunsdfsi
+#ifdef L_arm_fixunsdfsi
 
 ARM_FUNC_START fixunsdfsi
 ARM_FUNC_ALIAS aeabi_d2uiz fixunsdfsi
@@ -1377,7 +1377,7 @@ ARM_FUNC_ALIAS aeabi_d2uiz fixunsdfsi
 
 #endif /* L_fixunsdfsi */
 
-#ifdef L_truncdfsf2
+#ifdef L_arm_truncdfsf2
 
 ARM_FUNC_START truncdfsf2
 ARM_FUNC_ALIAS aeabi_d2f truncdfsf2
diff --git a/gcc/config/arm/ieee754-sf.S b/gcc/config/arm/ieee754-sf.S
index 405487a3855..2da156cd44d 100644
--- a/gcc/config/arm/ieee754-sf.S
+++ b/gcc/config/arm/ieee754-sf.S
@@ -1,6 +1,6 @@
 /* ieee754-sf.S single-precision floating point support for ARM
 
-   Copyright (C) 2003, 2004, 2005, 2007  Free Software Foundation, Inc.
+   Copyright (C) 2003, 2004, 2005, 2007, 2008  Free Software Foundation, Inc.
    Contributed by Nicolas Pitre (nico@cam.org)
 
    This file is free software; you can redistribute it and/or modify it
@@ -38,7 +38,7 @@
  * if necessary without impacting performances.
  */
 
-#ifdef L_negsf2
+#ifdef L_arm_negsf2
 	
 ARM_FUNC_START negsf2
 ARM_FUNC_ALIAS aeabi_fneg negsf2
@@ -51,7 +51,7 @@ ARM_FUNC_ALIAS aeabi_fneg negsf2
 
 #endif
 
-#ifdef L_addsubsf3
+#ifdef L_arm_addsubsf3
 
 ARM_FUNC_START aeabi_frsub
 
@@ -448,7 +448,7 @@ LSYM(f0_ret):
 
 #endif /* L_addsubsf3 */
 
-#ifdef L_muldivsf3
+#ifdef L_arm_muldivsf3
 
 ARM_FUNC_START mulsf3
 ARM_FUNC_ALIAS aeabi_fmul mulsf3
@@ -795,7 +795,7 @@ LSYM(Ldv_s):
 
 #endif /* L_muldivsf3 */
 
-#ifdef L_cmpsf2
+#ifdef L_arm_cmpsf2
 
 	@ The return value in r0 is
 	@
@@ -958,7 +958,7 @@ ARM_FUNC_START	aeabi_fcmpgt
 
 #endif /* L_cmpsf2 */
 
-#ifdef L_unordsf2
+#ifdef L_arm_unordsf2
 
 ARM_FUNC_START unordsf2
 ARM_FUNC_ALIAS aeabi_fcmpun unordsf2
@@ -983,7 +983,7 @@ ARM_FUNC_ALIAS aeabi_fcmpun unordsf2
 
 #endif /* L_unordsf2 */
 
-#ifdef L_fixsfsi
+#ifdef L_arm_fixsfsi
 
 ARM_FUNC_START fixsfsi
 ARM_FUNC_ALIAS aeabi_f2iz fixsfsi
@@ -1025,7 +1025,7 @@ ARM_FUNC_ALIAS aeabi_f2iz fixsfsi
 
 #endif /* L_fixsfsi */
 
-#ifdef L_fixunssfsi
+#ifdef L_arm_fixunssfsi
 
 ARM_FUNC_START fixunssfsi
 ARM_FUNC_ALIAS aeabi_f2uiz fixunssfsi
diff --git a/gcc/config/arm/lib1funcs.asm b/gcc/config/arm/lib1funcs.asm
index 0c6e440e598..2fc66be80d5 100644
--- a/gcc/config/arm/lib1funcs.asm
+++ b/gcc/config/arm/lib1funcs.asm
@@ -1,7 +1,7 @@
 @ libgcc routines for ARM cpu.
 @ Division routines, written by Richard Earnshaw, (rearnsha@armltd.co.uk)
 
-/* Copyright 1995, 1996, 1998, 1999, 2000, 2003, 2004, 2005, 2007
+/* Copyright 1995, 1996, 1998, 1999, 2000, 2003, 2004, 2005, 2007, 2008
    Free Software Foundation, Inc.
 
 This file is free software; you can redistribute it and/or modify it
@@ -94,7 +94,8 @@ Boston, MA 02110-1301, USA.  */
 
 #if defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) \
 	|| defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6Z__) \
-	|| defined(__ARM_ARCH_6ZK__) || defined(__ARM_ARCH_6T2__)
+	|| defined(__ARM_ARCH_6ZK__) || defined(__ARM_ARCH_6T2__) \
+	|| defined(__ARM_ARCH_6M__)
 # define __ARM_ARCH__ 6
 #endif
 
@@ -367,6 +368,9 @@ _L__\name:
 
 #else /* !(__INTERWORKING_STUBS__ || __thumb2__) */
 
+#ifdef __ARM_ARCH_6M__
+#define EQUIV .thumb_set
+#else
 .macro	ARM_FUNC_START name
 	.text
 	.globl SYM (__\name)
@@ -379,6 +383,7 @@ SYM (__\name):
 .macro  ARM_CALL name
 	bl	__\name
 .endm
+#endif
 
 #endif
 
@@ -391,6 +396,7 @@ SYM (__\name):
 #endif
 .endm
 
+#ifndef __ARM_ARCH_6M__
 .macro	ARM_FUNC_ALIAS new old
 	.globl	SYM (__\new)
 	EQUIV	SYM (__\new), SYM (__\old)
@@ -398,6 +404,7 @@ SYM (__\name):
 	.set	SYM (_L__\new), SYM (_L__\old)
 #endif
 .endm
+#endif
 
 #ifdef __thumb__
 /* Register aliases.  */
@@ -1256,8 +1263,8 @@ LSYM(Lover12):
 #endif /* L_call_via_rX */
 
 /* Don't bother with the old interworking routines for Thumb-2.  */
-/* ??? Maybe only omit these on v7m.  */
-#ifndef __thumb2__
+/* ??? Maybe only omit these on "m" variants.  */
+#if !defined(__thumb2__) && !defined(__ARM_ARCH_6M__)
 
 #if defined L_interwork_call_via_rX
 
@@ -1387,7 +1394,11 @@ LSYM(Lchange_\register):
 #endif /* Arch supports thumb.  */
 
 #ifndef __symbian__
+#ifndef __ARM_ARCH_6M__
 #include "ieee754-df.S"
 #include "ieee754-sf.S"
 #include "bpabi.S"
-#endif /* __symbian__ */
+#else /* __ARM_ARCH_6M__ */
+#include "bpabi-v6m.S"
+#endif /* __ARM_ARCH_6M__ */
+#endif /* !__symbian__ */
diff --git a/gcc/config/arm/libunwind.S b/gcc/config/arm/libunwind.S
index 0732e9f0af9..dd958548387 100644
--- a/gcc/config/arm/libunwind.S
+++ b/gcc/config/arm/libunwind.S
@@ -1,5 +1,5 @@
 /* Support functions for the unwinder.
-   Copyright (C) 2003, 2004, 2005, 2007  Free Software Foundation, Inc.
+   Copyright (C) 2003, 2004, 2005, 2007, 2008  Free Software Foundation, Inc.
    Contributed by Paul Brook
 
    This file is free software; you can redistribute it and/or modify it
@@ -53,6 +53,119 @@
 #endif
 #endif
 
+#ifdef __ARM_ARCH_6M__
+
+/* r0 points to a 16-word block.  Upload these values to the actual core
+   state.  */
+FUNC_START restore_core_regs
+	mov r1, r0
+	add r1, r1, #52
+	ldmia r1!, {r3, r4, r5}
+	sub r3, r3, #4
+	mov ip, r3
+	str r5, [r3]
+	mov lr, r4
+	/* Restore r8-r11.  */
+	mov r1, r0
+	add r1, r1, #32
+	ldmia r1!, {r2, r3, r4, r5}
+	mov r8, r2
+	mov r9, r3
+	mov sl, r4
+	mov fp, r5
+	mov r1, r0
+	add r1, r1, #8
+	ldmia r1!, {r2, r3, r4, r5, r6, r7}
+	ldr r1, [r0, #4]
+	ldr r0, [r0]
+	mov sp, ip
+	pop {pc}
+	FUNC_END restore_core_regs
+	UNPREFIX restore_core_regs
+
+/* ARMV6M does not have coprocessors, so these should never be used.  */
+FUNC_START gnu_Unwind_Restore_VFP
+	RET
+
+/* Store VFR regsters d0-d15 to the address in r0.  */
+FUNC_START gnu_Unwind_Save_VFP
+	RET
+
+/* Load VFP registers d0-d15 from the address in r0.
+   Use this to load from FSTMD format.  */
+FUNC_START gnu_Unwind_Restore_VFP_D
+	RET
+
+/* Store VFP registers d0-d15 to the address in r0.
+   Use this to store in FLDMD format.  */
+FUNC_START gnu_Unwind_Save_VFP_D
+	RET
+
+/* Load VFP registers d16-d31 from the address in r0.
+   Use this to load from FSTMD (=VSTM) format.  Needs VFPv3.  */
+FUNC_START gnu_Unwind_Restore_VFP_D_16_to_31
+	RET
+
+/* Store VFP registers d16-d31 to the address in r0.
+   Use this to store in FLDMD (=VLDM) format.  Needs VFPv3.  */
+FUNC_START gnu_Unwind_Save_VFP_D_16_to_31
+	RET
+
+FUNC_START gnu_Unwind_Restore_WMMXD
+	RET
+
+FUNC_START gnu_Unwind_Save_WMMXD
+	RET
+
+FUNC_START gnu_Unwind_Restore_WMMXC
+	RET
+
+FUNC_START gnu_Unwind_Save_WMMXC
+	RET
+
+.macro  UNWIND_WRAPPER name nargs
+	FUNC_START \name
+	/* Create a phase2_vrs structure.  */
+	/* Save r0 in the PC slot so we can use it as a scratch register.  */
+	push {r0}
+	add r0, sp, #4
+	push {r0, lr} /* Push original SP and LR.  */
+	/* Make space for r8-r12.  */
+	sub sp, sp, #20
+	/* Save low registers.  */
+	push {r0, r1, r2, r3, r4, r5, r6, r7}
+	/* Save high registers.  */
+	add r0, sp, #32
+	mov r1, r8
+	mov r2, r9
+	mov r3, sl
+	mov r4, fp
+	mov r5, ip
+	stmia r0!, {r1, r2, r3, r4, r5}
+	/* Restore original low register values.  */
+	add r0, sp, #4
+	ldmia r0!, {r1, r2, r3, r4, r5}
+	/* Restore orginial r0.  */
+	ldr r0, [sp, #60]
+	str r0, [sp]
+	/* Demand-save flags, plus an extra word for alignment.  */
+	mov r3, #0
+	push {r2, r3}
+	/* Point r1 at the block.  Pass r[0..nargs) unchanged.  */
+	add r\nargs, sp, #4
+
+	bl SYM (__gnu\name)
+
+	ldr r3, [sp, #64]
+	add sp, sp, #72
+	bx r3
+
+	FUNC_END \name
+	UNPREFIX \name
+.endm
+
+#else /* !__ARM_ARCH_6M__ */
+
 /* r0 points to a 16-word block.  Upload these values to the actual core
    state.  */
 ARM_FUNC_START restore_core_regs
@@ -233,6 +346,8 @@ ARM_FUNC_START gnu_Unwind_Save_WMMXC
 	UNPREFIX \name
 .endm
 
+#endif /* !__ARM_ARCH_6M__ */
+
 UNWIND_WRAPPER _Unwind_RaiseException 1
 UNWIND_WRAPPER _Unwind_Resume 1
 UNWIND_WRAPPER _Unwind_Resume_or_Rethrow 1
diff --git a/gcc/config/arm/sfp-machine.h b/gcc/config/arm/sfp-machine.h
new file mode 100644
index 00000000000..bc75737d73d
--- /dev/null
+++ b/gcc/config/arm/sfp-machine.h
@@ -0,0 +1,96 @@
+#define _FP_W_TYPE_SIZE		32
+#define _FP_W_TYPE		unsigned long
+#define _FP_WS_TYPE		signed long
+#define _FP_I_TYPE		long
+
+#define _FP_MUL_MEAT_S(R,X,Y)				\
+  _FP_MUL_MEAT_1_wide(_FP_WFRACBITS_S,R,X,Y,umul_ppmm)
+#define _FP_MUL_MEAT_D(R,X,Y)				\
+  _FP_MUL_MEAT_2_wide(_FP_WFRACBITS_D,R,X,Y,umul_ppmm)
+#define _FP_MUL_MEAT_Q(R,X,Y)				\
+  _FP_MUL_MEAT_4_wide(_FP_WFRACBITS_Q,R,X,Y,umul_ppmm)
+
+#define _FP_DIV_MEAT_S(R,X,Y)	_FP_DIV_MEAT_1_loop(S,R,X,Y)
+#define _FP_DIV_MEAT_D(R,X,Y)	_FP_DIV_MEAT_2_udiv(D,R,X,Y)
+#define _FP_DIV_MEAT_Q(R,X,Y)	_FP_DIV_MEAT_4_udiv(Q,R,X,Y)
+
+#define _FP_NANFRAC_S		((_FP_QNANBIT_S << 1) - 1)
+#define _FP_NANFRAC_D		((_FP_QNANBIT_D << 1) - 1), -1
+#define _FP_NANFRAC_Q		((_FP_QNANBIT_Q << 1) - 1), -1, -1, -1
+#define _FP_NANSIGN_S		0
+#define _FP_NANSIGN_D		0
+#define _FP_NANSIGN_Q		0
+
+#define _FP_KEEPNANFRACP 1
+
+/* Someone please check this.  */
+#define _FP_CHOOSENAN(fs, wc, R, X, Y, OP)			\
+  do {								\
+    if ((_FP_FRAC_HIGH_RAW_##fs(X) & _FP_QNANBIT_##fs)		\
+	&& !(_FP_FRAC_HIGH_RAW_##fs(Y) & _FP_QNANBIT_##fs))	\
+      {								\
+	R##_s = Y##_s;						\
+	_FP_FRAC_COPY_##wc(R,Y);				\
+      }								\
+    else							\
+      {								\
+	R##_s = X##_s;						\
+	_FP_FRAC_COPY_##wc(R,X);				\
+      }								\
+    R##_c = FP_CLS_NAN;						\
+  } while (0)
+
+#define	__LITTLE_ENDIAN	1234
+#define	__BIG_ENDIAN	4321
+
+#if defined __ARMEB__
+# define __BYTE_ORDER __BIG_ENDIAN
+#else
+# define __BYTE_ORDER __LITTLE_ENDIAN
+#endif
+
+
+/* Define ALIASNAME as a strong alias for NAME.  */
+# define strong_alias(name, aliasname) _strong_alias(name, aliasname)
+# define _strong_alias(name, aliasname) \
+  extern __typeof (name) aliasname __attribute__ ((alias (#name)));
+
+#ifdef __ARM_EABI__
+/* Rename functions to their EABI names.  */
+/* The comparison functions need wrappers for EABI semantics, so
+   leave them unmolested.  */
+#define __negsf2	__aeabi_fneg
+#define __subsf3	__aeabi_fsub
+#define __addsf3	__aeabi_fadd
+#define __floatunsisf	__aeabi_ui2f
+#define __floatsisf	__aeabi_i2f
+#define __floatundisf	__aeabi_ul2f
+#define __floatdisf	__aeabi_l2f
+#define __mulsf3	__aeabi_fmul
+#define __divsf3	__aeabi_fdiv
+#define __unordsf2	__aeabi_fcmpun
+#define __fixsfsi	__aeabi_f2iz
+#define __fixunssfsi	__aeabi_f2uiz
+#define __fixsfdi	__aeabi_f2lz
+#define __fixunssfdi	__aeabi_f2ulz
+#define __floatdisf	__aeabi_l2f
+
+#define __negdf2	__aeabi_dneg
+#define __subdf3	__aeabi_dsub
+#define __adddf3	__aeabi_dadd
+#define __floatunsidf	__aeabi_ui2d
+#define __floatsidf	__aeabi_i2d
+#define __extendsfdf2	__aeabi_f2d
+#define __truncdfsf2	__aeabi_d2f
+#define __floatundidf	__aeabi_ul2d
+#define __floatdidf	__aeabi_l2d
+#define __muldf3	__aeabi_dmul
+#define __divdf3	__aeabi_ddiv
+#define __unorddf2	__aeabi_dcmpun
+#define __fixdfsi	__aeabi_d2iz
+#define __fixunsdfsi	__aeabi_d2uiz
+#define __fixdfdi	__aeabi_d2lz
+#define __fixunsdfdi	__aeabi_d2ulz
+#define __floatdidf	__aeabi_l2d
+
+#endif /* __ARM_EABI__ */
diff --git a/gcc/config/arm/t-arm-elf b/gcc/config/arm/t-arm-elf
index b423bbb3597..31ba396b433 100644
--- a/gcc/config/arm/t-arm-elf
+++ b/gcc/config/arm/t-arm-elf
@@ -1,10 +1,16 @@
 LIB1ASMSRC = arm/lib1funcs.asm
+# For most CPUs we have an assembly soft-float implementations.
+# However this is not true for ARMv6M.  Here we want to use the soft-fp C
+# implementation.  The soft-fp code is only build for ARMv6M.  This pulls
+# in the asm implementation for other CPUs.
 LIB1ASMFUNCS = _udivsi3 _divsi3 _umodsi3 _modsi3 _dvmd_tls _bb_init_func \
 	_call_via_rX _interwork_call_via_rX \
 	_lshrdi3 _ashrdi3 _ashldi3 \
-	_negdf2 _addsubdf3 _muldivdf3 _cmpdf2 _unorddf2 _fixdfsi _fixunsdfsi \
-	_truncdfsf2 _negsf2 _addsubsf3 _muldivsf3 _cmpsf2 _unordsf2 \
-	_fixsfsi _fixunssfsi _floatdidf _floatdisf _floatundidf _floatundisf
+	_arm_negdf2 _arm_addsubdf3 _arm_muldivdf3 _arm_cmpdf2 _arm_unorddf2 \
+	_arm_fixdfsi _arm_fixunsdfsi \
+	_arm_truncdfsf2 _arm_negsf2 _arm_addsubsf3 _arm_muldivsf3 \
+	_arm_cmpsf2 _arm_unordsf2 _arm_fixsfsi _arm_fixunssfsi \
+	_arm_floatdidf _arm_floatdisf _arm_floatundidf _arm_floatundisf
 
 MULTILIB_OPTIONS     = marm/mthumb
 MULTILIB_DIRNAMES    = arm thumb
diff --git a/gcc/config/arm/t-arm-softfp b/gcc/config/arm/t-arm-softfp
new file mode 100644
index 00000000000..4a97747b195
--- /dev/null
+++ b/gcc/config/arm/t-arm-softfp
@@ -0,0 +1,11 @@
+softfp_float_modes := sf df
+softfp_int_modes := si di
+softfp_extensions := sfdf
+softfp_truncations := dfsf
+softfp_machine_header := arm/sfp-machine.h
+softfp_exclude_libgcc2 := y
+softfp_wrap_start := '\#ifdef __ARM_ARCH_6M__'
+softfp_wrap_end := '\#endif'
+
+# softfp seems to be missing a whole bunch of prototypes.
+TARGET_LIBGCC2_CFLAGS += -Wno-missing-prototypes
diff --git a/gcc/config/arm/thumb2.md b/gcc/config/arm/thumb2.md
index 0efe31f0f6d..ba45c88c2cb 100644
--- a/gcc/config/arm/thumb2.md
+++ b/gcc/config/arm/thumb2.md
@@ -1,5 +1,5 @@
 ;; ARM Thumb-2 Machine Description
-;; Copyright (C) 2007 Free Software Foundation, Inc.
+;; Copyright (C) 2007, 2008 Free Software Foundation, Inc.
 ;; Written by CodeSourcery, LLC.
 ;;
 ;; This file is part of GCC.
@@ -1131,7 +1131,8 @@
 		(match_operand:SI 2 "s_register_operand"  "r")))]
   "TARGET_THUMB2 && arm_arch_hwdiv"
   "sdiv%?\t%0, %1, %2"
-  [(set_attr "predicable" "yes")]
+  [(set_attr "predicable" "yes")
+   (set_attr "insn" "sdiv")]
 )
 
 (define_insn "udivsi3"
@@ -1140,7 +1141,8 @@
 		 (match_operand:SI 2 "s_register_operand"  "r")))]
   "TARGET_THUMB2 && arm_arch_hwdiv"
   "udiv%?\t%0, %1, %2"
-  [(set_attr "predicable" "yes")]
+  [(set_attr "predicable" "yes")
+   (set_attr "insn" "udiv")]
 )
 
 (define_insn "*thumb2_cbz"
diff --git a/gcc/config/i386/gthr-win32.c b/gcc/config/i386/gthr-win32.c
index c87a771ed8f..3810fc0f26c 100644
--- a/gcc/config/i386/gthr-win32.c
+++ b/gcc/config/i386/gthr-win32.c
@@ -151,6 +151,12 @@ __gthr_win32_mutex_init_function (__gthread_mutex_t *mutex)
   mutex->sema = CreateSemaphore (NULL, 0, 65535, NULL);
 }
 
+void
+__gthr_win32_mutex_destroy (__gthread_mutex_t *mutex)
+{
+  CloseHandle ((HANDLE) mutex->sema);
+}
+
 int
 __gthr_win32_mutex_lock (__gthread_mutex_t *mutex)
 {
diff --git a/gcc/config/i386/i386-modes.def b/gcc/config/i386/i386-modes.def
index 105d3872b79..a23e8203ff3 100644
--- a/gcc/config/i386/i386-modes.def
+++ b/gcc/config/i386/i386-modes.def
@@ -17,6 +17,10 @@ You should have received a copy of the GNU General Public License
 along with GCC; see the file COPYING3.  If not see
 <http://www.gnu.org/licenses/>.  */
 
+/* In 32bit, DI mode uses 32bit registers.  Only 4 byte alignment
+   is needed.  */
+ADJUST_ALIGNMENT (DI, (TARGET_64BIT || TARGET_ALIGN_DOUBLE) ? 8 : 4);
+
 /* The x86_64 ABI specifies both XF and TF modes.
    XFmode is __float80 is IEEE extended; TFmode is __float128
    is IEEE quad.  */
@@ -75,6 +79,7 @@ VECTOR_MODES (INT, 8);        /*       V8QI V4HI V2SI */
 VECTOR_MODES (INT, 16);       /* V16QI V8HI V4SI V2DI */
 VECTOR_MODES (FLOAT, 8);      /*            V4HF V2SF */
 VECTOR_MODES (FLOAT, 16);     /*       V8HF V4SF V2DF */
+VECTOR_MODE (INT, DI, 1);     /*                 V1DI */
 VECTOR_MODE (INT, QI, 2);     /*                 V2QI */
 VECTOR_MODE (INT, DI, 4);     /*                 V4DI */
 VECTOR_MODE (INT, SI, 8);     /*                 V8SI */
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index 5a4456d912b..74b2be23d9b 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -2700,6 +2700,18 @@ override_options (void)
       target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
     }
 
+  /* If stack probes are required, the space used for large function
+     arguments on the stack must also be probed, so enable
+     -maccumulate-outgoing-args so this happens in the prologue.  */
+  if (TARGET_STACK_PROBE
+      && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
+    {
+      if (target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
+	warning (0, "stack probing requires -maccumulate-outgoing-args "
+		 "for correctness");
+      target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
+    }
+
   /* For sane SSE instruction set generation we need fcomi instruction.
      It is safe to enable all CMOVE instructions.  */
   if (TARGET_SSE)
@@ -3826,7 +3838,7 @@ classify_argument (enum machine_mode mode, const_tree type,
     }
 
   /* for V1xx modes, just use the base mode */
-  if (VECTOR_MODE_P (mode)
+  if (VECTOR_MODE_P (mode) && mode != V1DImode
       && GET_MODE_SIZE (GET_MODE_INNER (mode)) == bytes)
     mode = GET_MODE_INNER (mode);
 
@@ -3898,6 +3910,7 @@ classify_argument (enum machine_mode mode, const_tree type,
       classes[0] = X86_64_SSE_CLASS;
       classes[1] = X86_64_SSEUP_CLASS;
       return 2;
+    case V1DImode:
     case V2SFmode:
     case V2SImode:
     case V4HImode:
@@ -4199,6 +4212,7 @@ function_arg_advance_32 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
     case V4HImode:
     case V2SImode:
     case V2SFmode:
+    case V1DImode:
       if (!type || !AGGREGATE_TYPE_P (type))
 	{
 	  cum->mmx_words += words;
@@ -4362,6 +4376,7 @@ function_arg_32 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
     case V4HImode:
     case V2SImode:
     case V2SFmode:
+    case V1DImode:
       if (!type || !AGGREGATE_TYPE_P (type))
 	{
 	  if (!TARGET_MMX && !warnedmmx && cum->warn_mmx)
@@ -16774,7 +16789,8 @@ ia32_multipass_dfa_lookahead (void)
 int
 ix86_constant_alignment (tree exp, int align)
 {
-  if (TREE_CODE (exp) == REAL_CST)
+  if (TREE_CODE (exp) == REAL_CST || TREE_CODE (exp) == VECTOR_CST
+      || TREE_CODE (exp) == INTEGER_CST)
     {
       if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
 	return 64;
@@ -17943,11 +17959,11 @@ static const struct builtin_description bdesc_2arg[] =
   { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, UNKNOWN, 0 },
   { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, UNKNOWN, 0 },
   { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, UNKNOWN, 0 },
-  { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_adddi3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, UNKNOWN, 0 },
+  { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_addv1di3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, UNKNOWN, 0 },
   { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, UNKNOWN, 0 },
   { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, UNKNOWN, 0 },
   { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, UNKNOWN, 0 },
-  { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_subdi3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, UNKNOWN, 0 },
+  { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_subv1di3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, UNKNOWN, 0 },
 
   { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, UNKNOWN, 0 },
   { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, UNKNOWN, 0 },
@@ -17998,25 +18014,6 @@ static const struct builtin_description bdesc_2arg[] =
   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtsi2ss, 0, IX86_BUILTIN_CVTSI2SS, UNKNOWN, 0 },
   { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtsi2ssq, 0, IX86_BUILTIN_CVTSI642SS, UNKNOWN, 0 },
 
-  { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, 0, IX86_BUILTIN_PSLLW, UNKNOWN, 0 },
-  { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, 0, IX86_BUILTIN_PSLLWI, UNKNOWN, 0 },
-  { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, 0, IX86_BUILTIN_PSLLD, UNKNOWN, 0 },
-  { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, 0, IX86_BUILTIN_PSLLDI, UNKNOWN, 0 },
-  { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQ, UNKNOWN, 0 },
-  { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQI, UNKNOWN, 0 },
-
-  { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, 0, IX86_BUILTIN_PSRLW, UNKNOWN, 0 },
-  { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, 0, IX86_BUILTIN_PSRLWI, UNKNOWN, 0 },
-  { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, 0, IX86_BUILTIN_PSRLD, UNKNOWN, 0 },
-  { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, 0, IX86_BUILTIN_PSRLDI, UNKNOWN, 0 },
-  { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQ, UNKNOWN, 0 },
-  { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQI, UNKNOWN, 0 },
-
-  { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, 0, IX86_BUILTIN_PSRAW, UNKNOWN, 0 },
-  { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, 0, IX86_BUILTIN_PSRAWI, UNKNOWN, 0 },
-  { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, 0, IX86_BUILTIN_PSRAD, UNKNOWN, 0 },
-  { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, 0, IX86_BUILTIN_PSRADI, UNKNOWN, 0 },
-
   { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_psadbw, 0, IX86_BUILTIN_PSADBW, UNKNOWN, 0 },
   { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_pmaddwd, 0, IX86_BUILTIN_PMADDWD, UNKNOWN, 0 },
 
@@ -18128,17 +18125,6 @@ static const struct builtin_description bdesc_2arg[] =
   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_umulsidi3, 0, IX86_BUILTIN_PMULUDQ, UNKNOWN, 0 },
   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_umulv2siv2di3, 0, IX86_BUILTIN_PMULUDQ128, UNKNOWN, 0 },
 
-  { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv8hi3, 0, IX86_BUILTIN_PSLLWI128, UNKNOWN, 0 },
-  { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv4si3, 0, IX86_BUILTIN_PSLLDI128, UNKNOWN, 0 },
-  { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv2di3, 0, IX86_BUILTIN_PSLLQI128, UNKNOWN, 0 },
-
-  { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv8hi3, 0, IX86_BUILTIN_PSRLWI128, UNKNOWN, 0 },
-  { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv4si3, 0, IX86_BUILTIN_PSRLDI128, UNKNOWN, 0 },
-  { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv2di3, 0, IX86_BUILTIN_PSRLQI128, UNKNOWN, 0 },
-
-  { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv8hi3, 0, IX86_BUILTIN_PSRAWI128, UNKNOWN, 0 },
-  { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv4si3, 0, IX86_BUILTIN_PSRADI128, UNKNOWN, 0 },
-
   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmaddwd, 0, IX86_BUILTIN_PMADDWD128, UNKNOWN, 0 },
 
   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsi2sd, 0, IX86_BUILTIN_CVTSI2SD, UNKNOWN, 0 },
@@ -18200,6 +18186,7 @@ static const struct builtin_description bdesc_2arg[] =
 
 static const struct builtin_description bdesc_1arg[] =
 {
+  /* SSE */
   { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB, UNKNOWN, 0 },
   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movmskps, 0, IX86_BUILTIN_MOVMSKPS, UNKNOWN, 0 },
 
@@ -18216,6 +18203,7 @@ static const struct builtin_description bdesc_1arg[] =
   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttss2si, 0, IX86_BUILTIN_CVTTSS2SI, UNKNOWN, 0 },
   { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvttss2siq, 0, IX86_BUILTIN_CVTTSS2SI64, UNKNOWN, 0 },
 
+  /* SSE2 */
   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB128, UNKNOWN, 0 },
   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movmskpd, 0, IX86_BUILTIN_MOVMSKPD, UNKNOWN, 0 },
 
@@ -18573,6 +18561,8 @@ ix86_init_mmx_sse_builtins (void)
 
   tree V16QI_type_node = build_vector_type_for_mode (char_type_node, V16QImode);
   tree V2SI_type_node = build_vector_type_for_mode (intSI_type_node, V2SImode);
+  tree V1DI_type_node
+    = build_vector_type_for_mode (long_long_integer_type_node, V1DImode);
   tree V2SF_type_node = build_vector_type_for_mode (float_type_node, V2SFmode);
   tree V2DI_type_node
     = build_vector_type_for_mode (long_long_integer_type_node, V2DImode);
@@ -18637,14 +18627,13 @@ ix86_init_mmx_sse_builtins (void)
   tree v4hi_ftype_v4hi_int
     = build_function_type_list (V4HI_type_node,
 				V4HI_type_node, integer_type_node, NULL_TREE);
-  tree v4hi_ftype_v4hi_di
-    = build_function_type_list (V4HI_type_node,
-				V4HI_type_node, long_long_unsigned_type_node,
-				NULL_TREE);
-  tree v2si_ftype_v2si_di
+  tree v2si_ftype_v2si_int
     = build_function_type_list (V2SI_type_node,
-				V2SI_type_node, long_long_unsigned_type_node,
-				NULL_TREE);
+				V2SI_type_node, integer_type_node, NULL_TREE);
+  tree v1di_ftype_v1di_int
+    = build_function_type_list (V1DI_type_node,
+				V1DI_type_node, integer_type_node, NULL_TREE);
+
   tree void_ftype_void
     = build_function_type (void_type_node, void_list_node);
   tree void_ftype_unsigned
@@ -18711,10 +18700,9 @@ ix86_init_mmx_sse_builtins (void)
   tree v2si_ftype_v2si_v2si
     = build_function_type_list (V2SI_type_node,
 				V2SI_type_node, V2SI_type_node, NULL_TREE);
-  tree di_ftype_di_di
-    = build_function_type_list (long_long_unsigned_type_node,
-				long_long_unsigned_type_node,
-				long_long_unsigned_type_node, NULL_TREE);
+  tree v1di_ftype_v1di_v1di
+    = build_function_type_list (V1DI_type_node,
+				V1DI_type_node, V1DI_type_node, NULL_TREE);
 
   tree di_ftype_di_di_int
     = build_function_type_list (long_long_unsigned_type_node,
@@ -19170,8 +19158,8 @@ ix86_init_mmx_sse_builtins (void)
 	case V2SImode:
 	  type = v2si_ftype_v2si_v2si;
 	  break;
-	case DImode:
-	  type = di_ftype_di_di;
+	case V1DImode:
+	  type = v1di_ftype_v1di_v1di;
 	  break;
 
 	default:
@@ -19263,16 +19251,25 @@ ix86_init_mmx_sse_builtins (void)
 
   /* Add the remaining MMX insns with somewhat more complicated types.  */
   def_builtin (OPTION_MASK_ISA_MMX, "__builtin_ia32_emms", void_ftype_void, IX86_BUILTIN_EMMS);
-  def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_psllw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSLLW);
-  def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_pslld", v2si_ftype_v2si_di, IX86_BUILTIN_PSLLD);
-  def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_psllq", di_ftype_di_di, IX86_BUILTIN_PSLLQ);
 
-  def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_psrlw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRLW);
-  def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_psrld", v2si_ftype_v2si_di, IX86_BUILTIN_PSRLD);
-  def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_psrlq", di_ftype_di_di, IX86_BUILTIN_PSRLQ);
-
-  def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_psraw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRAW);
-  def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_psrad", v2si_ftype_v2si_di, IX86_BUILTIN_PSRAD);
+  def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_psllwi", v4hi_ftype_v4hi_int, IX86_BUILTIN_PSLLWI);
+  def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_pslldi", v2si_ftype_v2si_int, IX86_BUILTIN_PSLLDI);
+  def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_psllqi", v1di_ftype_v1di_int, IX86_BUILTIN_PSLLQI);
+  def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_psllw", v4hi_ftype_v4hi_v4hi, IX86_BUILTIN_PSLLW);
+  def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_pslld", v2si_ftype_v2si_v2si, IX86_BUILTIN_PSLLD);
+  def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_psllq", v1di_ftype_v1di_v1di, IX86_BUILTIN_PSLLQ);
+
+  def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_psrlwi", v4hi_ftype_v4hi_int, IX86_BUILTIN_PSRLWI);
+  def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_psrldi", v2si_ftype_v2si_int, IX86_BUILTIN_PSRLDI);
+  def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_psrlqi", v1di_ftype_v1di_int, IX86_BUILTIN_PSRLQI);
+  def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_psrlw", v4hi_ftype_v4hi_v4hi, IX86_BUILTIN_PSRLW);
+  def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_psrld", v2si_ftype_v2si_v2si, IX86_BUILTIN_PSRLD);
+  def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_psrlq", v1di_ftype_v1di_v1di, IX86_BUILTIN_PSRLQ);
+
+  def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_psrawi", v4hi_ftype_v4hi_int, IX86_BUILTIN_PSRAWI);
+  def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_psradi", v2si_ftype_v2si_int, IX86_BUILTIN_PSRADI);
+  def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_psraw", v4hi_ftype_v4hi_v4hi, IX86_BUILTIN_PSRAW);
+  def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_psrad", v2si_ftype_v2si_v2si, IX86_BUILTIN_PSRAD);
 
   def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_pshufw", v4hi_ftype_v4hi_int, IX86_BUILTIN_PSHUFW);
   def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_pmaddwd", v2si_ftype_v4hi_v4hi, IX86_BUILTIN_PMADDWD);
@@ -20817,6 +20814,39 @@ ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
       emit_insn (pat);
       return target;
 
+    case IX86_BUILTIN_PSLLW:
+    case IX86_BUILTIN_PSLLWI:
+      icode = CODE_FOR_mmx_ashlv4hi3;
+      goto do_pshift;
+    case IX86_BUILTIN_PSLLD:
+    case IX86_BUILTIN_PSLLDI:
+      icode = CODE_FOR_mmx_ashlv2si3;
+      goto do_pshift;
+    case IX86_BUILTIN_PSLLQ:
+    case IX86_BUILTIN_PSLLQI:
+      icode = CODE_FOR_mmx_ashlv1di3;
+      goto do_pshift;
+    case IX86_BUILTIN_PSRAW:
+    case IX86_BUILTIN_PSRAWI:
+      icode = CODE_FOR_mmx_ashrv4hi3;
+      goto do_pshift;
+    case IX86_BUILTIN_PSRAD:
+    case IX86_BUILTIN_PSRADI:
+      icode = CODE_FOR_mmx_ashrv2si3;
+      goto do_pshift;
+    case IX86_BUILTIN_PSRLW:
+    case IX86_BUILTIN_PSRLWI:
+      icode = CODE_FOR_mmx_lshrv4hi3;
+      goto do_pshift;
+    case IX86_BUILTIN_PSRLD:
+    case IX86_BUILTIN_PSRLDI:
+      icode = CODE_FOR_mmx_lshrv2si3;
+      goto do_pshift;
+    case IX86_BUILTIN_PSRLQ:
+    case IX86_BUILTIN_PSRLQI:
+      icode = CODE_FOR_mmx_lshrv1di3;
+      goto do_pshift;
+
     case IX86_BUILTIN_PSLLW128:
     case IX86_BUILTIN_PSLLWI128:
       icode = CODE_FOR_ashlv8hi3;
diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h
index f2429846691..98cb72ac921 100644
--- a/gcc/config/i386/i386.h
+++ b/gcc/config/i386/i386.h
@@ -1120,8 +1120,9 @@ do {									\
   ((MODE) == V2SFmode || (MODE) == SFmode)
 
 #define VALID_MMX_REG_MODE(MODE)					\
-    ((MODE) == DImode || (MODE) == V8QImode || (MODE) == V4HImode	\
-     || (MODE) == V2SImode || (MODE) == SImode)
+  ((MODE == V1DImode) || (MODE) == DImode				\
+   || (MODE) == V2SImode || (MODE) == SImode				\
+   || (MODE) == V4HImode || (MODE) == V8QImode)
 
 /* ??? No autovectorization into MMX or 3DNOW until we can reliably
    place emms and femms instructions.  */
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index 92a37280f5f..eb942d60d9c 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -95,7 +95,6 @@
    (UNSPEC_RCP			45)
    (UNSPEC_RSQRT		46)
    (UNSPEC_SFENCE		47)
-   (UNSPEC_NOP			48)	; prevents combiner cleverness
    (UNSPEC_PFRCP		49)
    (UNSPEC_PFRCPIT1		40)
    (UNSPEC_PFRCPIT2		41)
@@ -19873,7 +19872,7 @@
    (set (reg:SI SP_REG) (minus:SI (reg:SI SP_REG) (match_dup 0)))
    (clobber (reg:CC FLAGS_REG))]
   "!TARGET_64BIT && TARGET_STACK_PROBE"
-  "call\t__alloca"
+  "call\t___chkstk"
   [(set_attr "type" "multi")
    (set_attr "length" "5")])
 
diff --git a/gcc/config/i386/mmintrin.h b/gcc/config/i386/mmintrin.h
index a04109be616..94800ad688e 100644
--- a/gcc/config/i386/mmintrin.h
+++ b/gcc/config/i386/mmintrin.h
@@ -42,6 +42,7 @@ typedef int __m64 __attribute__ ((__vector_size__ (8), __may_alias__));
 typedef int __v2si __attribute__ ((__vector_size__ (8)));
 typedef short __v4hi __attribute__ ((__vector_size__ (8)));
 typedef char __v8qi __attribute__ ((__vector_size__ (8)));
+typedef long long __v1di __attribute__ ((__vector_size__ (8)));
 
 /* Empty the multimedia state.  */
 static __inline void __attribute__((__always_inline__, __artificial__))
@@ -309,7 +310,7 @@ _m_paddd (__m64 __m1, __m64 __m2)
 static __inline __m64 __attribute__((__always_inline__, __artificial__))
 _mm_add_si64 (__m64 __m1, __m64 __m2)
 {
-  return (__m64) __builtin_ia32_paddq ((long long)__m1, (long long)__m2);
+  return (__m64) __builtin_ia32_paddq ((__v1di)__m1, (__v1di)__m2);
 }
 #endif
 
@@ -413,7 +414,7 @@ _m_psubd (__m64 __m1, __m64 __m2)
 static __inline __m64 __attribute__((__always_inline__, __artificial__))
 _mm_sub_si64 (__m64 __m1, __m64 __m2)
 {
-  return (__m64) __builtin_ia32_psubq ((long long)__m1, (long long)__m2);
+  return (__m64) __builtin_ia32_psubq ((__v1di)__m1, (__v1di)__m2);
 }
 #endif
 
@@ -520,7 +521,7 @@ _m_pmullw (__m64 __m1, __m64 __m2)
 static __inline __m64 __attribute__((__always_inline__, __artificial__))
 _mm_sll_pi16 (__m64 __m, __m64 __count)
 {
-  return (__m64) __builtin_ia32_psllw ((__v4hi)__m, (long long)__count);
+  return (__m64) __builtin_ia32_psllw ((__v4hi)__m, (__v4hi)__count);
 }
 
 static __inline __m64 __attribute__((__always_inline__, __artificial__))
@@ -532,7 +533,7 @@ _m_psllw (__m64 __m, __m64 __count)
 static __inline __m64 __attribute__((__always_inline__, __artificial__))
 _mm_slli_pi16 (__m64 __m, int __count)
 {
-  return (__m64) __builtin_ia32_psllw ((__v4hi)__m, __count);
+  return (__m64) __builtin_ia32_psllwi ((__v4hi)__m, __count);
 }
 
 static __inline __m64 __attribute__((__always_inline__, __artificial__))
@@ -545,7 +546,7 @@ _m_psllwi (__m64 __m, int __count)
 static __inline __m64 __attribute__((__always_inline__, __artificial__))
 _mm_sll_pi32 (__m64 __m, __m64 __count)
 {
-  return (__m64) __builtin_ia32_pslld ((__v2si)__m, (long long)__count);
+  return (__m64) __builtin_ia32_pslld ((__v2si)__m, (__v2si)__count);
 }
 
 static __inline __m64 __attribute__((__always_inline__, __artificial__))
@@ -557,7 +558,7 @@ _m_pslld (__m64 __m, __m64 __count)
 static __inline __m64 __attribute__((__always_inline__, __artificial__))
 _mm_slli_pi32 (__m64 __m, int __count)
 {
-  return (__m64) __builtin_ia32_pslld ((__v2si)__m, __count);
+  return (__m64) __builtin_ia32_pslldi ((__v2si)__m, __count);
 }
 
 static __inline __m64 __attribute__((__always_inline__, __artificial__))
@@ -570,7 +571,7 @@ _m_pslldi (__m64 __m, int __count)
 static __inline __m64 __attribute__((__always_inline__, __artificial__))
 _mm_sll_si64 (__m64 __m, __m64 __count)
 {
-  return (__m64) __builtin_ia32_psllq ((long long)__m, (long long)__count);
+  return (__m64) __builtin_ia32_psllq ((__v1di)__m, (__v1di)__count);
 }
 
 static __inline __m64 __attribute__((__always_inline__, __artificial__))
@@ -582,7 +583,7 @@ _m_psllq (__m64 __m, __m64 __count)
 static __inline __m64 __attribute__((__always_inline__, __artificial__))
 _mm_slli_si64 (__m64 __m, int __count)
 {
-  return (__m64) __builtin_ia32_psllq ((long long)__m, (long long)__count);
+  return (__m64) __builtin_ia32_psllqi ((__v1di)__m, __count);
 }
 
 static __inline __m64 __attribute__((__always_inline__, __artificial__))
@@ -595,7 +596,7 @@ _m_psllqi (__m64 __m, int __count)
 static __inline __m64 __attribute__((__always_inline__, __artificial__))
 _mm_sra_pi16 (__m64 __m, __m64 __count)
 {
-  return (__m64) __builtin_ia32_psraw ((__v4hi)__m, (long long)__count);
+  return (__m64) __builtin_ia32_psraw ((__v4hi)__m, (__v4hi)__count);
 }
 
 static __inline __m64 __attribute__((__always_inline__, __artificial__))
@@ -607,7 +608,7 @@ _m_psraw (__m64 __m, __m64 __count)
 static __inline __m64 __attribute__((__always_inline__, __artificial__))
 _mm_srai_pi16 (__m64 __m, int __count)
 {
-  return (__m64) __builtin_ia32_psraw ((__v4hi)__m, __count);
+  return (__m64) __builtin_ia32_psrawi ((__v4hi)__m, __count);
 }
 
 static __inline __m64 __attribute__((__always_inline__, __artificial__))
@@ -620,7 +621,7 @@ _m_psrawi (__m64 __m, int __count)
 static __inline __m64 __attribute__((__always_inline__, __artificial__))
 _mm_sra_pi32 (__m64 __m, __m64 __count)
 {
-  return (__m64) __builtin_ia32_psrad ((__v2si)__m, (long long)__count);
+  return (__m64) __builtin_ia32_psrad ((__v2si)__m, (__v2si)__count);
 }
 
 static __inline __m64 __attribute__((__always_inline__, __artificial__))
@@ -632,7 +633,7 @@ _m_psrad (__m64 __m, __m64 __count)
 static __inline __m64 __attribute__((__always_inline__, __artificial__))
 _mm_srai_pi32 (__m64 __m, int __count)
 {
-  return (__m64) __builtin_ia32_psrad ((__v2si)__m, __count);
+  return (__m64) __builtin_ia32_psradi ((__v2si)__m, __count);
 }
 
 static __inline __m64 __attribute__((__always_inline__, __artificial__))
@@ -645,7 +646,7 @@ _m_psradi (__m64 __m, int __count)
 static __inline __m64 __attribute__((__always_inline__, __artificial__))
 _mm_srl_pi16 (__m64 __m, __m64 __count)
 {
-  return (__m64) __builtin_ia32_psrlw ((__v4hi)__m, (long long)__count);
+  return (__m64) __builtin_ia32_psrlw ((__v4hi)__m, (__v4hi)__count);
 }
 
 static __inline __m64 __attribute__((__always_inline__, __artificial__))
@@ -657,7 +658,7 @@ _m_psrlw (__m64 __m, __m64 __count)
 static __inline __m64 __attribute__((__always_inline__, __artificial__))
 _mm_srli_pi16 (__m64 __m, int __count)
 {
-  return (__m64) __builtin_ia32_psrlw ((__v4hi)__m, __count);
+  return (__m64) __builtin_ia32_psrlwi ((__v4hi)__m, __count);
 }
 
 static __inline __m64 __attribute__((__always_inline__, __artificial__))
@@ -670,7 +671,7 @@ _m_psrlwi (__m64 __m, int __count)
 static __inline __m64 __attribute__((__always_inline__, __artificial__))
 _mm_srl_pi32 (__m64 __m, __m64 __count)
 {
-  return (__m64) __builtin_ia32_psrld ((__v2si)__m, (long long)__count);
+  return (__m64) __builtin_ia32_psrld ((__v2si)__m, (__v2si)__count);
 }
 
 static __inline __m64 __attribute__((__always_inline__, __artificial__))
@@ -682,7 +683,7 @@ _m_psrld (__m64 __m, __m64 __count)
 static __inline __m64 __attribute__((__always_inline__, __artificial__))
 _mm_srli_pi32 (__m64 __m, int __count)
 {
-  return (__m64) __builtin_ia32_psrld ((__v2si)__m, __count);
+  return (__m64) __builtin_ia32_psrldi ((__v2si)__m, __count);
 }
 
 static __inline __m64 __attribute__((__always_inline__, __artificial__))
@@ -695,7 +696,7 @@ _m_psrldi (__m64 __m, int __count)
 static __inline __m64 __attribute__((__always_inline__, __artificial__))
 _mm_srl_si64 (__m64 __m, __m64 __count)
 {
-  return (__m64) __builtin_ia32_psrlq ((long long)__m, (long long)__count);
+  return (__m64) __builtin_ia32_psrlq ((__v1di)__m, (__v1di)__count);
 }
 
 static __inline __m64 __attribute__((__always_inline__, __artificial__))
@@ -707,7 +708,7 @@ _m_psrlq (__m64 __m, __m64 __count)
 static __inline __m64 __attribute__((__always_inline__, __artificial__))
 _mm_srli_si64 (__m64 __m, int __count)
 {
-  return (__m64) __builtin_ia32_psrlq ((long long)__m, (long long)__count);
+  return (__m64) __builtin_ia32_psrlqi ((__v1di)__m, __count);
 }
 
 static __inline __m64 __attribute__((__always_inline__, __artificial__))
diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md
index 3371161f82f..1d2a41dd114 100644
--- a/gcc/config/i386/mmx.md
+++ b/gcc/config/i386/mmx.md
@@ -32,16 +32,18 @@
 
 ;; 8 byte integral modes handled by MMX (and by extension, SSE)
 (define_mode_iterator MMXMODEI [V8QI V4HI V2SI])
+(define_mode_iterator MMXMODEI8 [V8QI V4HI V2SI V1DI])
 
 ;; All 8-byte vector modes handled by MMX
-(define_mode_iterator MMXMODE [V8QI V4HI V2SI V2SF])
+(define_mode_iterator MMXMODE [V8QI V4HI V2SI V1DI V2SF])
 
 ;; Mix-n-match
 (define_mode_iterator MMXMODE12 [V8QI V4HI])
 (define_mode_iterator MMXMODE24 [V4HI V2SI])
+(define_mode_iterator MMXMODE248 [V4HI V2SI V1DI])
 
 ;; Mapping from integer vector mode to mnemonic suffix
-(define_mode_attr mmxvecsize [(V8QI "b") (V4HI "w") (V2SI "d") (DI "q")])
+(define_mode_attr mmxvecsize [(V8QI "b") (V4HI "w") (V2SI "d") (V1DI "q")])
 
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 ;;
@@ -53,8 +55,8 @@
 ;; This is essential for maintaining stable calling conventions.
 
 (define_expand "mov<mode>"
-  [(set (match_operand:MMXMODEI 0 "nonimmediate_operand" "")
-	(match_operand:MMXMODEI 1 "nonimmediate_operand" ""))]
+  [(set (match_operand:MMXMODEI8 0 "nonimmediate_operand" "")
+	(match_operand:MMXMODEI8 1 "nonimmediate_operand" ""))]
   "TARGET_MMX"
 {
   ix86_expand_vector_move (<MODE>mode, operands);
@@ -62,9 +64,9 @@
 })
 
 (define_insn "*mov<mode>_internal_rex64"
-  [(set (match_operand:MMXMODEI 0 "nonimmediate_operand"
+  [(set (match_operand:MMXMODEI8 0 "nonimmediate_operand"
 				"=rm,r,!y,!y ,m ,!y,Y2,x,x ,m,r,x")
-	(match_operand:MMXMODEI 1 "vector_move_operand"
+	(match_operand:MMXMODEI8 1 "vector_move_operand"
 				"Cr ,m,C ,!ym,!y,Y2,!y,C,xm,x,x,r"))]
   "TARGET_64BIT && TARGET_MMX
    && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
@@ -86,9 +88,9 @@
    (set_attr "mode" "DI")])
 
 (define_insn "*mov<mode>_internal"
-  [(set (match_operand:MMXMODEI 0 "nonimmediate_operand"
+  [(set (match_operand:MMXMODEI8 0 "nonimmediate_operand"
 			"=!y,!y ,m ,!y ,*Y2,*Y2,*Y2 ,m  ,*x,*x,*x,m ,?r ,?m")
-	(match_operand:MMXMODEI 1 "vector_move_operand"
+	(match_operand:MMXMODEI8 1 "vector_move_operand"
 			"C  ,!ym,!y,*Y2,!y ,C  ,*Y2m,*Y2,C ,*x,m ,*x,irm,r"))]
   "TARGET_MMX
    && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
@@ -557,26 +559,16 @@
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 
 (define_insn "mmx_add<mode>3"
-  [(set (match_operand:MMXMODEI 0 "register_operand" "=y")
-        (plus:MMXMODEI
-	  (match_operand:MMXMODEI 1 "nonimmediate_operand" "%0")
-	  (match_operand:MMXMODEI 2 "nonimmediate_operand" "ym")))]
-  "TARGET_MMX && ix86_binary_operator_ok (PLUS, <MODE>mode, operands)"
+  [(set (match_operand:MMXMODEI8 0 "register_operand" "=y")
+        (plus:MMXMODEI8
+	  (match_operand:MMXMODEI8 1 "nonimmediate_operand" "%0")
+	  (match_operand:MMXMODEI8 2 "nonimmediate_operand" "ym")))]
+  "(TARGET_MMX || (TARGET_SSE2 && <MODE>mode == V1DImode))
+   && ix86_binary_operator_ok (PLUS, <MODE>mode, operands)"
   "padd<mmxvecsize>\t{%2, %0|%0, %2}"
   [(set_attr "type" "mmxadd")
    (set_attr "mode" "DI")])
 
-(define_insn "mmx_adddi3"
-  [(set (match_operand:DI 0 "register_operand" "=y")
-        (unspec:DI
-	 [(plus:DI (match_operand:DI 1 "nonimmediate_operand" "%0")
-		   (match_operand:DI 2 "nonimmediate_operand" "ym"))]
-	 UNSPEC_NOP))]
-  "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, DImode, operands)"
-  "paddq\t{%2, %0|%0, %2}"
-  [(set_attr "type" "mmxadd")
-   (set_attr "mode" "DI")])
-
 (define_insn "mmx_ssadd<mode>3"
   [(set (match_operand:MMXMODE12 0 "register_operand" "=y")
         (ss_plus:MMXMODE12
@@ -598,26 +590,15 @@
    (set_attr "mode" "DI")])
 
 (define_insn "mmx_sub<mode>3"
-  [(set (match_operand:MMXMODEI 0 "register_operand" "=y")
-        (minus:MMXMODEI
-	  (match_operand:MMXMODEI 1 "register_operand" "0")
-	  (match_operand:MMXMODEI 2 "nonimmediate_operand" "ym")))]
-  "TARGET_MMX"
+  [(set (match_operand:MMXMODEI8 0 "register_operand" "=y")
+        (minus:MMXMODEI8
+	  (match_operand:MMXMODEI8 1 "register_operand" "0")
+	  (match_operand:MMXMODEI8 2 "nonimmediate_operand" "ym")))]
+  "(TARGET_MMX || (TARGET_SSE2 && <MODE>mode == V1DImode))"
   "psub<mmxvecsize>\t{%2, %0|%0, %2}"
   [(set_attr "type" "mmxadd")
    (set_attr "mode" "DI")])
 
-(define_insn "mmx_subdi3"
-  [(set (match_operand:DI 0 "register_operand" "=y")
-        (unspec:DI
-	 [(minus:DI (match_operand:DI 1 "register_operand" "0")
-		    (match_operand:DI 2 "nonimmediate_operand" "ym"))]
-	 UNSPEC_NOP))]
-  "TARGET_SSE2"
-  "psubq\t{%2, %0|%0, %2}"
-  [(set_attr "type" "mmxadd")
-   (set_attr "mode" "DI")])
-
 (define_insn "mmx_sssub<mode>3"
   [(set (match_operand:MMXMODE12 0 "register_operand" "=y")
         (ss_minus:MMXMODE12
@@ -778,54 +759,32 @@
   [(set (match_operand:MMXMODE24 0 "register_operand" "=y")
         (ashiftrt:MMXMODE24
 	  (match_operand:MMXMODE24 1 "register_operand" "0")
-	  (match_operand:DI 2 "nonmemory_operand" "yi")))]
+	  (match_operand:SI 2 "nonmemory_operand" "yN")))]
   "TARGET_MMX"
   "psra<mmxvecsize>\t{%2, %0|%0, %2}"
   [(set_attr "type" "mmxshft")
    (set_attr "mode" "DI")])
 
 (define_insn "mmx_lshr<mode>3"
-  [(set (match_operand:MMXMODE24 0 "register_operand" "=y")
-        (lshiftrt:MMXMODE24
-	  (match_operand:MMXMODE24 1 "register_operand" "0")
-	  (match_operand:DI 2 "nonmemory_operand" "yi")))]
+  [(set (match_operand:MMXMODE248 0 "register_operand" "=y")
+        (lshiftrt:MMXMODE248
+	  (match_operand:MMXMODE248 1 "register_operand" "0")
+	  (match_operand:SI 2 "nonmemory_operand" "yN")))]
   "TARGET_MMX"
   "psrl<mmxvecsize>\t{%2, %0|%0, %2}"
   [(set_attr "type" "mmxshft")
    (set_attr "mode" "DI")])
 
-(define_insn "mmx_lshrdi3"
-  [(set (match_operand:DI 0 "register_operand" "=y")
-        (unspec:DI
-	  [(lshiftrt:DI (match_operand:DI 1 "register_operand" "0")
-		       (match_operand:DI 2 "nonmemory_operand" "yi"))]
-	  UNSPEC_NOP))]
-  "TARGET_MMX"
-  "psrlq\t{%2, %0|%0, %2}"
-  [(set_attr "type" "mmxshft")
-   (set_attr "mode" "DI")])
-
 (define_insn "mmx_ashl<mode>3"
-  [(set (match_operand:MMXMODE24 0 "register_operand" "=y")
-        (ashift:MMXMODE24
-	  (match_operand:MMXMODE24 1 "register_operand" "0")
-	  (match_operand:DI 2 "nonmemory_operand" "yi")))]
+  [(set (match_operand:MMXMODE248 0 "register_operand" "=y")
+        (ashift:MMXMODE248
+	  (match_operand:MMXMODE248 1 "register_operand" "0")
+	  (match_operand:SI 2 "nonmemory_operand" "yN")))]
   "TARGET_MMX"
   "psll<mmxvecsize>\t{%2, %0|%0, %2}"
   [(set_attr "type" "mmxshft")
    (set_attr "mode" "DI")])
 
-(define_insn "mmx_ashldi3"
-  [(set (match_operand:DI 0 "register_operand" "=y")
-        (unspec:DI
-	 [(ashift:DI (match_operand:DI 1 "register_operand" "0")
-		     (match_operand:DI 2 "nonmemory_operand" "yi"))]
-	 UNSPEC_NOP))]
-  "TARGET_MMX"
-  "psllq\t{%2, %0|%0, %2}"
-  [(set_attr "type" "mmxshft")
-   (set_attr "mode" "DI")])
-
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 ;;
 ;; Parallel integral comparisons
diff --git a/gcc/config/i386/smmintrin.h b/gcc/config/i386/smmintrin.h
index e5b0ae04c0f..08bfd107232 100644
--- a/gcc/config/i386/smmintrin.h
+++ b/gcc/config/i386/smmintrin.h
@@ -470,30 +470,30 @@ _mm_stream_load_si128 (__m128i *__X)
 #ifdef __SSE4_2__
 
 /* These macros specify the source data format.  */
-#define SIDD_UBYTE_OPS			0x00
-#define SIDD_UWORD_OPS			0x01
-#define SIDD_SBYTE_OPS			0x02
-#define SIDD_SWORD_OPS			0x03
+#define _SIDD_UBYTE_OPS			0x00
+#define _SIDD_UWORD_OPS			0x01
+#define _SIDD_SBYTE_OPS			0x02
+#define _SIDD_SWORD_OPS			0x03
 
 /* These macros specify the comparison operation.  */
-#define SIDD_CMP_EQUAL_ANY		0x00
-#define SIDD_CMP_RANGES			0x04
-#define SIDD_CMP_EQUAL_EACH		0x08
-#define SIDD_CMP_EQUAL_ORDERED		0x0c
+#define _SIDD_CMP_EQUAL_ANY		0x00
+#define _SIDD_CMP_RANGES		0x04
+#define _SIDD_CMP_EQUAL_EACH		0x08
+#define _SIDD_CMP_EQUAL_ORDERED		0x0c
 
 /* These macros specify the the polarity.  */
-#define SIDD_POSITIVE_POLARITY		0x00
-#define SIDD_NEGATIVE_POLARITY		0x10
-#define SIDD_MASKED_POSITIVE_POLARITY	0x20
-#define SIDD_MASKED_NEGATIVE_POLARITY	0x30
+#define _SIDD_POSITIVE_POLARITY		0x00
+#define _SIDD_NEGATIVE_POLARITY		0x10
+#define _SIDD_MASKED_POSITIVE_POLARITY	0x20
+#define _SIDD_MASKED_NEGATIVE_POLARITY	0x30
 
 /* These macros specify the output selection in _mm_cmpXstri ().  */
-#define SIDD_LEAST_SIGNIFICANT		0x00
-#define SIDD_MOST_SIGNIFICANT		0x40
+#define _SIDD_LEAST_SIGNIFICANT		0x00
+#define _SIDD_MOST_SIGNIFICANT		0x40
 
 /* These macros specify the output selection in _mm_cmpXstrm ().  */
-#define SIDD_BIT_MASK			0x00
-#define SIDD_UNIT_MASK			0x40
+#define _SIDD_BIT_MASK			0x00
+#define _SIDD_UNIT_MASK			0x40
 
 /* Intrinsics for text/string processing.  */
 
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index 97250dbd2ed..8ea5bc0048b 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -5969,7 +5969,7 @@
 	  (mult:V8HI
 	    (zero_extend:V8HI
 	      (vec_select:V4QI
-		(match_operand:V16QI 1 "nonimmediate_operand" "%0")
+		(match_operand:V16QI 1 "nonimmediate_operand" "0")
 		(parallel [(const_int 0)
 			   (const_int 2)
 			   (const_int 4)
@@ -6023,7 +6023,7 @@
 	  (mult:V4HI
 	    (zero_extend:V4HI
 	      (vec_select:V4QI
-		(match_operand:V8QI 1 "nonimmediate_operand" "%0")
+		(match_operand:V8QI 1 "nonimmediate_operand" "0")
 		(parallel [(const_int 0)
 			   (const_int 2)
 			   (const_int 4)
diff --git a/gcc/config/rs6000/altivec.md b/gcc/config/rs6000/altivec.md
index 2261a9369f3..95c588ca070 100644
--- a/gcc/config/rs6000/altivec.md
+++ b/gcc/config/rs6000/altivec.md
@@ -64,7 +64,6 @@
    (UNSPEC_VPKUWUS      102)
    (UNSPEC_VPKSWUS      103)
    (UNSPEC_VRL          104)
-   (UNSPEC_VSL          107)
    (UNSPEC_VSLV4SI      110)
    (UNSPEC_VSLO         111)
    (UNSPEC_VSR          118)
@@ -576,7 +575,7 @@
   /* Generate [-0.0, -0.0, -0.0, -0.0].  */
   neg0 = gen_reg_rtx (V4SImode);
   emit_insn (gen_altivec_vspltisw (neg0, constm1_rtx));
-  emit_insn (gen_altivec_vslw (neg0, neg0, neg0));
+  emit_insn (gen_ashlv4si3 (neg0, neg0, neg0));
 
   /* Use the multiply-add.  */
   emit_insn (gen_altivec_vmaddfp (operands[0], operands[1], operands[2],
@@ -635,7 +634,7 @@
    high_product = gen_reg_rtx (V4SImode);
    emit_insn (gen_altivec_vmsumuhm (high_product, one, small_swap, zero));
  
-   emit_insn (gen_altivec_vslw (high_product, high_product, sixteen));
+   emit_insn (gen_ashlv4si3 (high_product, high_product, sixteen));
  
    emit_insn (gen_addv4si3 (operands[0], high_product, low_product));
    
@@ -1221,15 +1220,6 @@
   "vrl<VI_char> %0,%1,%2"
   [(set_attr "type" "vecsimple")])
 
-(define_insn "altivec_vsl<VI_char>"
-  [(set (match_operand:VI 0 "register_operand" "=v")
-        (unspec:VI [(match_operand:VI 1 "register_operand" "v")
-                    (match_operand:VI 2 "register_operand" "v")]
-		   UNSPEC_VSL))]
-  "TARGET_ALTIVEC"
-  "vsl<VI_char> %0,%1,%2"
-  [(set_attr "type" "vecsimple")])
-
 (define_insn "altivec_vsl"
   [(set (match_operand:V4SI 0 "register_operand" "=v")
         (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "v")
@@ -1248,6 +1238,14 @@
   "vslo %0,%1,%2"
   [(set_attr "type" "vecperm")])
 
+(define_insn "ashl<mode>3"
+  [(set (match_operand:VI 0 "register_operand" "=v")
+        (ashift:VI (match_operand:VI 1 "register_operand" "v")
+                   (match_operand:VI 2 "register_operand" "v") ))]
+  "TARGET_ALTIVEC"
+  "vsl<VI_char> %0,%1,%2"
+  [(set_attr "type" "vecsimple")])
+
 (define_insn "lshr<mode>3"
   [(set (match_operand:VI 0 "register_operand" "=v")
         (lshiftrt:VI (match_operand:VI 1 "register_operand" "v")
@@ -2039,7 +2037,7 @@
   [(set (match_dup 2)
 	(vec_duplicate:V4SI (const_int -1)))
    (set (match_dup 3)
-        (unspec:V4SI [(match_dup 2) (match_dup 2)] UNSPEC_VSL))
+        (ashift:V4SI (match_dup 2) (match_dup 2)))
    (set (match_operand:V4SF 0 "register_operand" "=v")
         (and:V4SF (not:V4SF (subreg:V4SF (match_dup 3) 0))
                   (match_operand:V4SF 1 "register_operand" "v")))]
@@ -2642,7 +2640,7 @@
   /* Generate [-0.0, -0.0, -0.0, -0.0].  */
   neg0 = gen_reg_rtx (V4SImode);
   emit_insn (gen_altivec_vspltisw (neg0, constm1_rtx));
-  emit_insn (gen_altivec_vslw (neg0, neg0, neg0));
+  emit_insn (gen_ashlv4si3 (neg0, neg0, neg0));
 
   /* XOR */
   emit_insn (gen_xorv4sf3 (operands[0],
diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c
index 30f226bf46e..593408f56d5 100644
--- a/gcc/config/rs6000/rs6000.c
+++ b/gcc/config/rs6000/rs6000.c
@@ -732,7 +732,6 @@ static const char *rs6000_invalid_within_doloop (const_rtx);
 static rtx rs6000_generate_compare (enum rtx_code);
 static void rs6000_emit_stack_tie (void);
 static void rs6000_frame_related (rtx, rtx, HOST_WIDE_INT, rtx, rtx);
-static rtx spe_synthesize_frame_save (rtx);
 static bool spe_func_has_64bit_regs_p (void);
 static void emit_frame_save (rtx, rtx, enum machine_mode, unsigned int,
 			     int, HOST_WIDE_INT);
@@ -3616,6 +3615,7 @@ rs6000_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
       && GET_CODE (XEXP (x, 1)) == CONST_INT
       && (unsigned HOST_WIDE_INT) (INTVAL (XEXP (x, 1)) + 0x8000) >= 0x10000
       && !(SPE_VECTOR_MODE (mode)
+	   || ALTIVEC_VECTOR_MODE (mode)
 	   || (TARGET_E500_DOUBLE && (mode == DFmode || mode == TFmode
 				      || mode == DImode))))
     {
@@ -3633,11 +3633,12 @@ rs6000_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
 	   && GET_MODE_NUNITS (mode) == 1
 	   && ((TARGET_HARD_FLOAT && TARGET_FPRS)
 	       || TARGET_POWERPC64
-	       || (((mode != DImode && mode != DFmode && mode != DDmode)
-		    || TARGET_E500_DOUBLE)
-		   && mode != TFmode && mode != TDmode))
+	       || ((mode != DImode && mode != DFmode && mode != DDmode)
+		   || TARGET_E500_DOUBLE))
 	   && (TARGET_POWERPC64 || mode != DImode)
-	   && mode != TImode)
+	   && mode != TImode
+	   && mode != TFmode
+	   && mode != TDmode)
     {
       return gen_rtx_PLUS (Pmode, XEXP (x, 0),
 			   force_reg (Pmode, force_operand (XEXP (x, 1), 0)));
@@ -7089,9 +7090,9 @@ static struct builtin_description bdesc_2arg[] =
   { MASK_ALTIVEC, CODE_FOR_altivec_vrlb, "__builtin_altivec_vrlb", ALTIVEC_BUILTIN_VRLB },
   { MASK_ALTIVEC, CODE_FOR_altivec_vrlh, "__builtin_altivec_vrlh", ALTIVEC_BUILTIN_VRLH },
   { MASK_ALTIVEC, CODE_FOR_altivec_vrlw, "__builtin_altivec_vrlw", ALTIVEC_BUILTIN_VRLW },
-  { MASK_ALTIVEC, CODE_FOR_altivec_vslb, "__builtin_altivec_vslb", ALTIVEC_BUILTIN_VSLB },
-  { MASK_ALTIVEC, CODE_FOR_altivec_vslh, "__builtin_altivec_vslh", ALTIVEC_BUILTIN_VSLH },
-  { MASK_ALTIVEC, CODE_FOR_altivec_vslw, "__builtin_altivec_vslw", ALTIVEC_BUILTIN_VSLW },
+  { MASK_ALTIVEC, CODE_FOR_ashlv16qi3, "__builtin_altivec_vslb", ALTIVEC_BUILTIN_VSLB },
+  { MASK_ALTIVEC, CODE_FOR_ashlv8hi3, "__builtin_altivec_vslh", ALTIVEC_BUILTIN_VSLH },
+  { MASK_ALTIVEC, CODE_FOR_ashlv4si3, "__builtin_altivec_vslw", ALTIVEC_BUILTIN_VSLW },
   { MASK_ALTIVEC, CODE_FOR_altivec_vsl, "__builtin_altivec_vsl", ALTIVEC_BUILTIN_VSL },
   { MASK_ALTIVEC, CODE_FOR_altivec_vslo, "__builtin_altivec_vslo", ALTIVEC_BUILTIN_VSLO },
   { MASK_ALTIVEC, CODE_FOR_altivec_vspltb, "__builtin_altivec_vspltb", ALTIVEC_BUILTIN_VSPLTB },
@@ -13902,6 +13903,9 @@ rs6000_expand_compare_and_swapqhi (rtx dst, rtx mem, rtx oldval, rtx newval)
   emit_insn (gen_sync_compare_and_swapqhi_internal (wdst, mask,
 						    oldval, newval, mem));
 
+  /* Shift the result back.  */
+  emit_insn (gen_lshrsi3 (wdst, wdst, shift));
+
   emit_move_insn (dst, gen_lowpart (mode, wdst));
 }
 
@@ -15386,77 +15390,12 @@ rs6000_frame_related (rtx insn, rtx reg, HOST_WIDE_INT val,
 	  }
     }
 
-  if (TARGET_SPE)
-    real = spe_synthesize_frame_save (real);
-
   RTX_FRAME_RELATED_P (insn) = 1;
   REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
 					real,
 					REG_NOTES (insn));
 }
 
-/* Given an SPE frame note, return a PARALLEL of SETs with the
-   original note, plus a synthetic register save.  */
-
-static rtx
-spe_synthesize_frame_save (rtx real)
-{
-  rtx synth, offset, reg, real2;
-
-  if (GET_CODE (real) != SET
-      || GET_MODE (SET_SRC (real)) != V2SImode)
-    return real;
-
-  /* For the SPE, registers saved in 64-bits, get a PARALLEL for their
-     frame related note.  The parallel contains a set of the register
-     being saved, and another set to a synthetic register (n+1200).
-     This is so we can differentiate between 64-bit and 32-bit saves.
-     Words cannot describe this nastiness.  */
-
-  gcc_assert (GET_CODE (SET_DEST (real)) == MEM
-	      && GET_CODE (XEXP (SET_DEST (real), 0)) == PLUS
-	      && GET_CODE (SET_SRC (real)) == REG);
-
-  /* Transform:
-       (set (mem (plus (reg x) (const y)))
-            (reg z))
-     into:
-       (set (mem (plus (reg x) (const y+4)))
-            (reg z+1200))
-  */
-
-  real2 = copy_rtx (real);
-  PUT_MODE (SET_DEST (real2), SImode);
-  reg = SET_SRC (real2);
-  real2 = replace_rtx (real2, reg, gen_rtx_REG (SImode, REGNO (reg)));
-  synth = copy_rtx (real2);
-
-  if (BYTES_BIG_ENDIAN)
-    {
-      offset = XEXP (XEXP (SET_DEST (real2), 0), 1);
-      real2 = replace_rtx (real2, offset, GEN_INT (INTVAL (offset) + 4));
-    }
-
-  reg = SET_SRC (synth);
-
-  synth = replace_rtx (synth, reg,
-		       gen_rtx_REG (SImode, REGNO (reg) + 1200));
-
-  offset = XEXP (XEXP (SET_DEST (synth), 0), 1);
-  synth = replace_rtx (synth, offset,
-		       GEN_INT (INTVAL (offset)
-				+ (BYTES_BIG_ENDIAN ? 0 : 4)));
-
-  RTX_FRAME_RELATED_P (synth) = 1;
-  RTX_FRAME_RELATED_P (real2) = 1;
-  if (BYTES_BIG_ENDIAN)
-    real = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, synth, real2));
-  else
-    real = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, real2, synth));
-
-  return real;
-}
-
 /* Returns an insn that has a vrsave set operation with the
    appropriate CLOBBERs.  */
author	bstarynk <bstarynk@138bc75d-0d04-0410-961f-82ee72b054a4>	2008-03-11 15:19:27 +0000
committer	bstarynk <bstarynk@138bc75d-0d04-0410-961f-82ee72b054a4>	2008-03-11 15:19:27 +0000
commit	84e39114ef36f4f202028695936e3725b9a08474 (patch)
tree	1b76344540036a57ec23806ad0e4e22b62c916c3 /gcc/config
parent	f778423557464b07c76cbf36e2032cccb94c02bc (diff)
download	gcc-84e39114ef36f4f202028695936e3725b9a08474.tar.gz