summaryrefslogtreecommitdiff
path: root/gcc/config
diff options
context:
space:
mode:
authorbstarynk <bstarynk@138bc75d-0d04-0410-961f-82ee72b054a4>2008-03-11 15:19:27 +0000
committerbstarynk <bstarynk@138bc75d-0d04-0410-961f-82ee72b054a4>2008-03-11 15:19:27 +0000
commit84e39114ef36f4f202028695936e3725b9a08474 (patch)
tree1b76344540036a57ec23806ad0e4e22b62c916c3 /gcc/config
parentf778423557464b07c76cbf36e2032cccb94c02bc (diff)
downloadgcc-84e39114ef36f4f202028695936e3725b9a08474.tar.gz
2008-03-11 Basile Starynkevitch <basile@starynkevitch.net>
merged with trunk r133107 git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/branches/melt-branch@133113 138bc75d-0d04-0410-961f-82ee72b054a4
Diffstat (limited to 'gcc/config')
-rw-r--r--gcc/config/arm/arm-cores.def3
-rw-r--r--gcc/config/arm/arm-tune.md2
-rw-r--r--gcc/config/arm/arm.c104
-rw-r--r--gcc/config/arm/arm.h8
-rw-r--r--gcc/config/arm/arm.md12
-rw-r--r--gcc/config/arm/bpabi-v6m.S280
-rw-r--r--gcc/config/arm/bpabi.h17
-rw-r--r--gcc/config/arm/cortex-r4.md288
-rw-r--r--gcc/config/arm/elf.h18
-rw-r--r--gcc/config/arm/ieee754-df.S18
-rw-r--r--gcc/config/arm/ieee754-sf.S16
-rw-r--r--gcc/config/arm/lib1funcs.asm21
-rw-r--r--gcc/config/arm/libunwind.S117
-rw-r--r--gcc/config/arm/sfp-machine.h96
-rw-r--r--gcc/config/arm/t-arm-elf12
-rw-r--r--gcc/config/arm/t-arm-softfp11
-rw-r--r--gcc/config/arm/thumb2.md8
-rw-r--r--gcc/config/i386/gthr-win32.c6
-rw-r--r--gcc/config/i386/i386-modes.def5
-rw-r--r--gcc/config/i386/i386.c142
-rw-r--r--gcc/config/i386/i386.h5
-rw-r--r--gcc/config/i386/i386.md3
-rw-r--r--gcc/config/i386/mmintrin.h37
-rw-r--r--gcc/config/i386/mmx.md101
-rw-r--r--gcc/config/i386/smmintrin.h32
-rw-r--r--gcc/config/i386/sse.md4
-rw-r--r--gcc/config/rs6000/altivec.md26
-rw-r--r--gcc/config/rs6000/rs6000.c85
28 files changed, 1165 insertions, 312 deletions
diff --git a/gcc/config/arm/arm-cores.def b/gcc/config/arm/arm-cores.def
index 748dcb074d1..cce3195a453 100644
--- a/gcc/config/arm/arm-cores.def
+++ b/gcc/config/arm/arm-cores.def
@@ -1,5 +1,5 @@
/* ARM CPU Cores
- Copyright (C) 2003, 2005, 2006, 2007 Free Software Foundation, Inc.
+ Copyright (C) 2003, 2005, 2006, 2007, 2008 Free Software Foundation, Inc.
Written by CodeSourcery, LLC
This file is part of GCC.
@@ -118,3 +118,4 @@ ARM_CORE("arm1156t2-s", arm1156t2s, 6T2, FL_LDSCHED, 9e)
ARM_CORE("cortex-a8", cortexa8, 7A, FL_LDSCHED, 9e)
ARM_CORE("cortex-r4", cortexr4, 7R, FL_LDSCHED, 9e)
ARM_CORE("cortex-m3", cortexm3, 7M, FL_LDSCHED, 9e)
+ARM_CORE("cortex-m1", cortexm1, 6M, FL_LDSCHED, 9e)
diff --git a/gcc/config/arm/arm-tune.md b/gcc/config/arm/arm-tune.md
index 5b4c46f07b7..d73382bc920 100644
--- a/gcc/config/arm/arm-tune.md
+++ b/gcc/config/arm/arm-tune.md
@@ -1,5 +1,5 @@
;; -*- buffer-read-only: t -*-
;; Generated automatically by gentune.sh from arm-cores.def
(define_attr "tune"
- "arm2,arm250,arm3,arm6,arm60,arm600,arm610,arm620,arm7,arm7d,arm7di,arm70,arm700,arm700i,arm710,arm720,arm710c,arm7100,arm7500,arm7500fe,arm7m,arm7dm,arm7dmi,arm8,arm810,strongarm,strongarm110,strongarm1100,strongarm1110,arm7tdmi,arm7tdmis,arm710t,arm720t,arm740t,arm9,arm9tdmi,arm920,arm920t,arm922t,arm940t,ep9312,arm10tdmi,arm1020t,arm9e,arm946es,arm966es,arm968es,arm10e,arm1020e,arm1022e,xscale,iwmmxt,arm926ejs,arm1026ejs,arm1136js,arm1136jfs,arm1176jzs,arm1176jzfs,mpcorenovfp,mpcore,arm1156t2s,cortexa8,cortexr4,cortexm3"
+ "arm2,arm250,arm3,arm6,arm60,arm600,arm610,arm620,arm7,arm7d,arm7di,arm70,arm700,arm700i,arm710,arm720,arm710c,arm7100,arm7500,arm7500fe,arm7m,arm7dm,arm7dmi,arm8,arm810,strongarm,strongarm110,strongarm1100,strongarm1110,arm7tdmi,arm7tdmis,arm710t,arm720t,arm740t,arm9,arm9tdmi,arm920,arm920t,arm922t,arm940t,ep9312,arm10tdmi,arm1020t,arm9e,arm946es,arm966es,arm968es,arm10e,arm1020e,arm1022e,xscale,iwmmxt,arm926ejs,arm1026ejs,arm1136js,arm1136jfs,arm1176jzs,arm1176jzfs,mpcorenovfp,mpcore,arm1156t2s,cortexa8,cortexr4,cortexm3,cortexm1"
(const (symbol_ref "arm_tune")))
diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c
index c66b51ec328..bddb0e2771a 100644
--- a/gcc/config/arm/arm.c
+++ b/gcc/config/arm/arm.c
@@ -1,6 +1,6 @@
/* Output routines for GCC for ARM.
Copyright (C) 1991, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
- 2002, 2003, 2004, 2005, 2006, 2007 Free Software Foundation, Inc.
+ 2002, 2003, 2004, 2005, 2006, 2007, 2008 Free Software Foundation, Inc.
Contributed by Pieter `Tiggr' Schoenmakers (rcpieter@win.tue.nl)
and Martin Simmons (@harleqn.co.uk).
More major hacks by Richard Earnshaw (rearnsha@arm.com).
@@ -188,6 +188,7 @@ static void arm_target_help (void);
static unsigned HOST_WIDE_INT arm_shift_truncation_mask (enum machine_mode);
static bool arm_cannot_copy_insn_p (rtx);
static bool arm_tls_symbol_p (rtx x);
+static int arm_issue_rate (void);
static void arm_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
@@ -358,6 +359,9 @@ static void arm_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
#undef TARGET_CANNOT_FORCE_CONST_MEM
#define TARGET_CANNOT_FORCE_CONST_MEM arm_cannot_force_const_mem
+#undef TARGET_SCHED_ISSUE_RATE
+#define TARGET_SCHED_ISSUE_RATE arm_issue_rate
+
#undef TARGET_MANGLE_TYPE
#define TARGET_MANGLE_TYPE arm_mangle_type
@@ -460,6 +464,7 @@ static int thumb_call_reg_needed;
#define FL_FOR_ARCH6Z FL_FOR_ARCH6
#define FL_FOR_ARCH6ZK FL_FOR_ARCH6K
#define FL_FOR_ARCH6T2 (FL_FOR_ARCH6 | FL_THUMB2)
+#define FL_FOR_ARCH6M (FL_FOR_ARCH6 & ~FL_NOTM)
#define FL_FOR_ARCH7 (FL_FOR_ARCH6T2 &~ FL_NOTM)
#define FL_FOR_ARCH7A (FL_FOR_ARCH7 | FL_NOTM)
#define FL_FOR_ARCH7R (FL_FOR_ARCH7A | FL_DIV)
@@ -632,6 +637,7 @@ static const struct processors all_architectures[] =
{"armv6z", arm1176jzs, "6Z", FL_CO_PROC | FL_FOR_ARCH6Z, NULL},
{"armv6zk", arm1176jzs, "6ZK", FL_CO_PROC | FL_FOR_ARCH6ZK, NULL},
{"armv6t2", arm1156t2s, "6T2", FL_CO_PROC | FL_FOR_ARCH6T2, NULL},
+ {"armv6-m", cortexm1, "6M", FL_FOR_ARCH6M, NULL},
{"armv7", cortexa8, "7", FL_CO_PROC | FL_FOR_ARCH7, NULL},
{"armv7-a", cortexa8, "7A", FL_CO_PROC | FL_FOR_ARCH7A, NULL},
{"armv7-r", cortexr4, "7R", FL_CO_PROC | FL_FOR_ARCH7R, NULL},
@@ -17639,12 +17645,23 @@ arm_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
? 1 : 0);
if (mi_delta < 0)
mi_delta = - mi_delta;
- /* When generating 16-bit thumb code, thunks are entered in arm mode. */
+
if (TARGET_THUMB1)
{
int labelno = thunk_label++;
ASM_GENERATE_INTERNAL_LABEL (label, "LTHUMBFUNC", labelno);
- fputs ("\tldr\tr12, ", file);
+ /* Thunks are entered in arm mode when avaiable. */
+ if (TARGET_THUMB1_ONLY)
+ {
+ /* push r3 so we can use it as a temporary. */
+ /* TODO: Omit this save if r3 is not used. */
+ fputs ("\tpush {r3}\n", file);
+ fputs ("\tldr\tr3, ", file);
+ }
+ else
+ {
+ fputs ("\tldr\tr12, ", file);
+ }
assemble_name (file, label);
fputc ('\n', file);
if (flag_pic)
@@ -17658,29 +17675,63 @@ arm_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
Note that we have "+ 1" because some versions of GNU ld
don't set the low bit of the result for R_ARM_REL32
- relocations against thumb function symbols. */
+ relocations against thumb function symbols.
+ On ARMv6M this is +4, not +8. */
ASM_GENERATE_INTERNAL_LABEL (labelpc, "LTHUNKPC", labelno);
assemble_name (file, labelpc);
fputs (":\n", file);
- fputs ("\tadd\tr12, pc, r12\n", file);
+ if (TARGET_THUMB1_ONLY)
+ {
+ /* This is 2 insns after the start of the thunk, so we know it
+ is 4-byte aligned. */
+ fputs ("\tadd\tr3, pc, r3\n", file);
+ fputs ("\tmov r12, r3\n", file);
+ }
+ else
+ fputs ("\tadd\tr12, pc, r12\n", file);
}
+ else if (TARGET_THUMB1_ONLY)
+ fputs ("\tmov r12, r3\n", file);
}
- /* TODO: Use movw/movt for large constants when available. */
- while (mi_delta != 0)
+ if (TARGET_THUMB1_ONLY)
{
- if ((mi_delta & (3 << shift)) == 0)
- shift += 2;
- else
- {
- asm_fprintf (file, "\t%s\t%r, %r, #%d\n",
- mi_op, this_regno, this_regno,
- mi_delta & (0xff << shift));
- mi_delta &= ~(0xff << shift);
- shift += 8;
- }
+ if (mi_delta > 255)
+ {
+ fputs ("\tldr\tr3, ", file);
+ assemble_name (file, label);
+ fputs ("+4\n", file);
+ asm_fprintf (file, "\t%s\t%r, %r, r3\n",
+ mi_op, this_regno, this_regno);
+ }
+ else if (mi_delta != 0)
+ {
+ asm_fprintf (file, "\t%s\t%r, %r, #%d\n",
+ mi_op, this_regno, this_regno,
+ mi_delta);
+ }
+ }
+ else
+ {
+ /* TODO: Use movw/movt for large constants when available. */
+ while (mi_delta != 0)
+ {
+ if ((mi_delta & (3 << shift)) == 0)
+ shift += 2;
+ else
+ {
+ asm_fprintf (file, "\t%s\t%r, %r, #%d\n",
+ mi_op, this_regno, this_regno,
+ mi_delta & (0xff << shift));
+ mi_delta &= ~(0xff << shift);
+ shift += 8;
+ }
+ }
}
if (TARGET_THUMB1)
{
+ if (TARGET_THUMB1_ONLY)
+ fputs ("\tpop\t{r3}\n", file);
+
fprintf (file, "\tbx\tr12\n");
ASM_OUTPUT_ALIGN (file, 2);
assemble_name (file, label);
@@ -17699,6 +17750,9 @@ arm_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
else
/* Output ".word .LTHUNKn". */
assemble_integer (XEXP (DECL_RTL (function), 0), 4, BITS_PER_WORD, 1);
+
+ if (TARGET_THUMB1_ONLY && mi_delta > 255)
+ assemble_integer (GEN_INT(mi_delta), 4, BITS_PER_WORD, 1);
}
else
{
@@ -18660,6 +18714,22 @@ thumb2_output_casesi (rtx *operands)
}
}
+/* Most ARM cores are single issue, but some newer ones can dual issue.
+ The scheduler descriptions rely on this being correct. */
+static int
+arm_issue_rate (void)
+{
+ switch (arm_tune)
+ {
+ case cortexr4:
+ case cortexa8:
+ return 2;
+
+ default:
+ return 1;
+ }
+}
+
/* A table and a function to perform ARM-specific name mangling for
NEON vector types in order to conform to the AAPCS (see "Procedure
Call Standard for the ARM Architecture", Appendix A). To qualify
diff --git a/gcc/config/arm/arm.h b/gcc/config/arm/arm.h
index fcb90ab3ca6..d93476ba77c 100644
--- a/gcc/config/arm/arm.h
+++ b/gcc/config/arm/arm.h
@@ -1,6 +1,7 @@
/* Definitions of target machine for GNU compiler, for ARM.
Copyright (C) 1991, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
- 2001, 2002, 2003, 2004, 2005, 2006, 2007 Free Software Foundation, Inc.
+ 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008
+ Free Software Foundation, Inc.
Contributed by Pieter `Tiggr' Schoenmakers (rcpieter@win.tue.nl)
and Martin Simmons (@harleqn.co.uk).
More major hacks by Richard Earnshaw (rearnsha@arm.com)
@@ -207,6 +208,8 @@ extern void (*arm_lang_output_object_attributes_hook)(void);
#define TARGET_32BIT (TARGET_ARM || arm_arch_thumb2)
/* 32-bit Thumb-2 code. */
#define TARGET_THUMB2 (TARGET_THUMB && arm_arch_thumb2)
+/* Thumb-1 only. */
+#define TARGET_THUMB1_ONLY (TARGET_THUMB1 && !arm_arch_notm)
/* The following two macros concern the ability to execute coprocessor
instructions for VFPv3 or NEON. TARGET_VFP3 is currently only ever
@@ -2397,7 +2400,8 @@ extern int making_const_table;
if (TARGET_THUMB) \
{ \
if (is_called_in_ARM_mode (DECL) \
- || (TARGET_THUMB1 && current_function_is_thunk)) \
+ || (TARGET_THUMB1 && !TARGET_THUMB1_ONLY \
+ && current_function_is_thunk)) \
fprintf (STREAM, "\t.code 32\n") ; \
else if (TARGET_THUMB1) \
fprintf (STREAM, "\t.code\t16\n\t.thumb_func\n") ; \
diff --git a/gcc/config/arm/arm.md b/gcc/config/arm/arm.md
index 765b89bb84c..5b40449cee1 100644
--- a/gcc/config/arm/arm.md
+++ b/gcc/config/arm/arm.md
@@ -1,6 +1,7 @@
;;- Machine description for ARM for GNU compiler
;; Copyright 1991, 1993, 1994, 1995, 1996, 1996, 1997, 1998, 1999, 2000,
-;; 2001, 2002, 2003, 2004, 2005, 2006, 2007 Free Software Foundation, Inc.
+;; 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008
+;; Free Software Foundation, Inc.
;; Contributed by Pieter `Tiggr' Schoenmakers (rcpieter@win.tue.nl)
;; and Martin Simmons (@harleqn.co.uk).
;; More major hacks by Richard Earnshaw (rearnsha@arm.com).
@@ -93,9 +94,9 @@
(UNSPEC_TLS 20) ; A symbol that has been treated properly for TLS usage.
(UNSPEC_PIC_LABEL 21) ; A label used for PIC access that does not appear in the
; instruction stream.
- (UNSPEC_STACK_ALIGN 20) ; Doubleword aligned stack pointer. Used to
+ (UNSPEC_STACK_ALIGN 22) ; Doubleword aligned stack pointer. Used to
; generate correct unwind information.
- (UNSPEC_PIC_OFFSET 22) ; A symbolic 12-bit OFFSET that has been treated
+ (UNSPEC_PIC_OFFSET 23) ; A symbolic 12-bit OFFSET that has been treated
; correctly for PIC usage.
]
)
@@ -183,7 +184,7 @@
;; scheduling information.
(define_attr "insn"
- "mov,mvn,smulxy,smlaxy,smlalxy,smulwy,smlawx,mul,muls,mla,mlas,umull,umulls,umlal,umlals,smull,smulls,smlal,smlals,smlawy,smuad,smuadx,smlad,smladx,smusd,smusdx,smlsd,smlsdx,smmul,smmulr,smmla,umaal,smlald,smlsld,clz,mrs,msr,xtab,other"
+ "mov,mvn,smulxy,smlaxy,smlalxy,smulwy,smlawx,mul,muls,mla,mlas,umull,umulls,umlal,umlals,smull,smulls,smlal,smlals,smlawy,smuad,smuadx,smlad,smladx,smusd,smusdx,smlsd,smlsdx,smmul,smmulr,smmla,umaal,smlald,smlsld,clz,mrs,msr,xtab,sdiv,udiv,other"
(const_string "other"))
; TYPE attribute is used to detect floating point instructions which, if
@@ -332,7 +333,7 @@
(define_attr "generic_sched" "yes,no"
(const (if_then_else
- (eq_attr "tune" "arm926ejs,arm1020e,arm1026ejs,arm1136js,arm1136jfs,cortexa8")
+ (eq_attr "tune" "arm926ejs,arm1020e,arm1026ejs,arm1136js,arm1136jfs,cortexa8,cortexr4")
(const_string "no")
(const_string "yes"))))
@@ -349,6 +350,7 @@
(include "arm1026ejs.md")
(include "arm1136jfs.md")
(include "cortex-a8.md")
+(include "cortex-r4.md")
;;---------------------------------------------------------------------------
diff --git a/gcc/config/arm/bpabi-v6m.S b/gcc/config/arm/bpabi-v6m.S
new file mode 100644
index 00000000000..fa3b9c41478
--- /dev/null
+++ b/gcc/config/arm/bpabi-v6m.S
@@ -0,0 +1,280 @@
+/* Miscellaneous BPABI functions. ARMv6M implementation
+
+ Copyright (C) 2006, 2008 Free Software Foundation, Inc.
+ Contributed by CodeSourcery.
+
+ This file is free software; you can redistribute it and/or modify it
+ under the terms of the GNU General Public License as published by the
+ Free Software Foundation; either version 2, or (at your option) any
+ later version.
+
+ In addition to the permissions in the GNU General Public License, the
+ Free Software Foundation gives you unlimited permission to link the
+ compiled version of this file into combinations with other programs,
+ and to distribute those combinations without any restriction coming
+ from the use of this file. (The General Public License restrictions
+ do apply in other respects; for example, they cover modification of
+ the file, and distribution when not linked into a combine
+ executable.)
+
+ This file is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; see the file COPYING. If not, write to
+ the Free Software Foundation, 51 Franklin Street, Fifth Floor,
+ Boston, MA 02110-1301, USA. */
+
+#ifdef __ARMEB__
+#define xxh r0
+#define xxl r1
+#define yyh r2
+#define yyl r3
+#else
+#define xxh r1
+#define xxl r0
+#define yyh r3
+#define yyl r2
+#endif
+
+#ifdef L_aeabi_lcmp
+
+FUNC_START aeabi_lcmp
+ cmp xxh, yyh
+ beq 1f
+ bgt 2f
+ mov r0, #1
+ neg r0, r0
+ RET
+2:
+ mov r0, #1
+ RET
+1:
+ sub r0, xxl, yyl
+ beq 1f
+ bhi 2f
+ mov r0, #1
+ neg r0, r0
+ RET
+2:
+ mov r0, #1
+1:
+ RET
+ FUNC_END aeabi_lcmp
+
+#endif /* L_aeabi_lcmp */
+
+#ifdef L_aeabi_ulcmp
+
+FUNC_START aeabi_ulcmp
+ cmp xxh, yyh
+ bne 1f
+ sub r0, xxl, yyl
+ beq 2f
+1:
+ bcs 1f
+ mov r0, #1
+ neg r0, r0
+ RET
+1:
+ mov r0, #1
+2:
+ RET
+ FUNC_END aeabi_ulcmp
+
+#endif /* L_aeabi_ulcmp */
+
+#ifdef L_aeabi_ldivmod
+
+FUNC_START aeabi_ldivmod
+ push {r0, r1}
+ mov r0, sp
+ push {r0, lr}
+ ldr r0, [sp, #8]
+ bl SYM(__gnu_ldivmod_helper)
+ ldr r3, [sp, #4]
+ mov lr, r3
+ add sp, sp, #8
+ pop {r2, r3}
+ RET
+ FUNC_END aeabi_ldivmod
+
+#endif /* L_aeabi_ldivmod */
+
+#ifdef L_aeabi_uldivmod
+
+FUNC_START aeabi_uldivmod
+ push {r0, r1}
+ mov r0, sp
+ push {r0, lr}
+ ldr r0, [sp, #8]
+ bl SYM(__gnu_uldivmod_helper)
+ ldr r3, [sp, #4]
+ mov lr, r3
+ add sp, sp, #8
+ pop {r2, r3}
+ RET
+ FUNC_END aeabi_uldivmod
+
+#endif /* L_aeabi_uldivmod */
+
+#ifdef L_arm_addsubsf3
+
+FUNC_START aeabi_frsub
+
+ push {r4, lr}
+ mov r4, #1
+ lsl r4, #31
+ eor r0, r0, r4
+ bl __aeabi_fadd
+ pop {r4, pc}
+
+ FUNC_END aeabi_frsub
+
+#endif /* L_arm_addsubsf3 */
+
+#ifdef L_arm_cmpsf2
+
+FUNC_START aeabi_cfrcmple
+
+ mov ip, r0
+ mov r0, r1
+ mov r1, ip
+ b 6f
+
+FUNC_START aeabi_cfcmpeq
+FUNC_ALIAS aeabi_cfcmple aeabi_cfcmpeq
+
+ @ The status-returning routines are required to preserve all
+ @ registers except ip, lr, and cpsr.
+6: push {r0, r1, r2, r3, r4, lr}
+ bl __lesf2
+ @ Set the Z flag correctly, and the C flag unconditionally.
+ cmp r0, #0
+ @ Clear the C flag if the return value was -1, indicating
+ @ that the first operand was smaller than the second.
+ bmi 1f
+ mov r1, #0
+ cmn r0, r1
+1:
+ pop {r0, r1, r2, r3, r4, pc}
+
+ FUNC_END aeabi_cfcmple
+ FUNC_END aeabi_cfcmpeq
+ FUNC_END aeabi_cfrcmple
+
+FUNC_START aeabi_fcmpeq
+
+ push {r4, lr}
+ bl __eqsf2
+ neg r0, r0
+ add r0, r0, #1
+ pop {r4, pc}
+
+ FUNC_END aeabi_fcmpeq
+
+.macro COMPARISON cond, helper, mode=sf2
+FUNC_START aeabi_fcmp\cond
+
+ push {r4, lr}
+ bl __\helper\mode
+ cmp r0, #0
+ b\cond 1f
+ mov r0, #0
+ pop {r4, pc}
+1:
+ mov r0, #1
+ pop {r4, pc}
+
+ FUNC_END aeabi_fcmp\cond
+.endm
+
+COMPARISON lt, le
+COMPARISON le, le
+COMPARISON gt, ge
+COMPARISON ge, ge
+
+#endif /* L_arm_cmpsf2 */
+
+#ifdef L_arm_addsubdf3
+
+FUNC_START aeabi_drsub
+
+ push {r4, lr}
+ mov r4, #1
+ lsl r4, #31
+ eor xxh, xxh, r4
+ bl __aeabi_dadd
+ pop {r4, pc}
+
+ FUNC_END aeabi_drsub
+
+#endif /* L_arm_addsubdf3 */
+
+#ifdef L_arm_cmpdf2
+
+FUNC_START aeabi_cdrcmple
+
+ mov ip, r0
+ mov r0, r2
+ mov r2, ip
+ mov ip, r1
+ mov r1, r3
+ mov r3, ip
+ b 6f
+
+FUNC_START aeabi_cdcmpeq
+FUNC_ALIAS aeabi_cdcmple aeabi_cdcmpeq
+
+ @ The status-returning routines are required to preserve all
+ @ registers except ip, lr, and cpsr.
+6: push {r0, r1, r2, r3, r4, lr}
+ bl __ledf2
+ @ Set the Z flag correctly, and the C flag unconditionally.
+ cmp r0, #0
+ @ Clear the C flag if the return value was -1, indicating
+ @ that the first operand was smaller than the second.
+ bmi 1f
+ mov r1, #0
+ cmn r0, r1
+1:
+ pop {r0, r1, r2, r3, r4, pc}
+
+ FUNC_END aeabi_cdcmple
+ FUNC_END aeabi_cdcmpeq
+ FUNC_END aeabi_cdrcmple
+
+FUNC_START aeabi_dcmpeq
+
+ push {r4, lr}
+ bl __eqdf2
+ neg r0, r0
+ add r0, r0, #1
+ pop {r4, pc}
+
+ FUNC_END aeabi_dcmpeq
+
+.macro COMPARISON cond, helper, mode=df2
+FUNC_START aeabi_dcmp\cond
+
+ push {r4, lr}
+ bl __\helper\mode
+ cmp r0, #0
+ b\cond 1f
+ mov r0, #0
+ pop {r4, pc}
+1:
+ mov r0, #1
+ pop {r4, pc}
+
+ FUNC_END aeabi_dcmp\cond
+.endm
+
+COMPARISON lt, le
+COMPARISON le, le
+COMPARISON gt, ge
+COMPARISON ge, ge
+
+#endif /* L_arm_cmpdf2 */
diff --git a/gcc/config/arm/bpabi.h b/gcc/config/arm/bpabi.h
index a67f6498cdd..0f3b24faaf3 100644
--- a/gcc/config/arm/bpabi.h
+++ b/gcc/config/arm/bpabi.h
@@ -1,5 +1,5 @@
/* Configuration file for ARM BPABI targets.
- Copyright (C) 2004, 2005, 2007
+ Copyright (C) 2004, 2005, 2007, 2008
Free Software Foundation, Inc.
Contributed by CodeSourcery, LLC
@@ -99,6 +99,21 @@
#define DECLARE_LIBRARY_RENAMES RENAME_LIBRARY (floatdisf, l2f)
#endif
+/* These renames are needed on ARMv6M. Other targets get them from
+ assembly routines. */
+#ifdef L_fixunsdfsi
+#define DECLARE_LIBRARY_RENAMES RENAME_LIBRARY (fixunsdfsi, d2uiz)
+#endif
+#ifdef L_fixunssfsi
+#define DECLARE_LIBRARY_RENAMES RENAME_LIBRARY (fixunssfsi, f2uiz)
+#endif
+#ifdef L_floatundidf
+#define DECLARE_LIBRARY_RENAMES RENAME_LIBRARY (floatundidf, ul2d)
+#endif
+#ifdef L_floatundisf
+#define DECLARE_LIBRARY_RENAMES RENAME_LIBRARY (floatundisf, ul2f)
+#endif
+
/* The BPABI requires that we always use an out-of-line implementation
of RTTI comparison, even if the target supports weak symbols,
because the same object file might be used on a target that does
diff --git a/gcc/config/arm/cortex-r4.md b/gcc/config/arm/cortex-r4.md
new file mode 100644
index 00000000000..d912f205994
--- /dev/null
+++ b/gcc/config/arm/cortex-r4.md
@@ -0,0 +1,288 @@
+;; ARM Cortex-R4 scheduling description.
+;; Copyright (C) 2007, 2008 Free Software Foundation, Inc.
+;; Contributed by CodeSourcery.
+
+;; This file is part of GCC.
+
+;; GCC is distributed in the hope that it will be useful, but WITHOUT
+;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+;; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
+;; License for more details.
+
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING. If not, write to
+;; the Free Software Foundation, 51 Franklin Street, Fifth Floor,
+;; Boston, MA 02110-1301, USA.
+
+(define_automaton "cortex_r4")
+
+;; We approximate the dual-issue constraints of this core using four
+;; "issue units" and a reservation matrix as follows. The numbers indicate
+;; the instruction groups' preferences in order. Multiple entries for
+;; the same numbered preference indicate units that must be reserved
+;; together.
+;;
+;; Issue unit: A B C ALU
+;;
+;; ALU w/o reg shift 1st 2nd 1st and 2nd
+;; ALU w/ reg shift 1st 2nd 2nd 1st and 2nd
+;; Moves 1st 2nd 2nd
+;; Multiplication 1st 1st
+;; Division 1st 1st
+;; Load/store single 1st 1st
+;; Other load/store 1st 1st
+;; Branches 1st
+
+(define_cpu_unit "cortex_r4_issue_a" "cortex_r4")
+(define_cpu_unit "cortex_r4_issue_b" "cortex_r4")
+(define_cpu_unit "cortex_r4_issue_c" "cortex_r4")
+(define_cpu_unit "cortex_r4_issue_alu" "cortex_r4")
+
+(define_reservation "cortex_r4_alu"
+ "(cortex_r4_issue_a+cortex_r4_issue_alu)|\
+ (cortex_r4_issue_b+cortex_r4_issue_alu)")
+(define_reservation "cortex_r4_alu_shift_reg"
+ "(cortex_r4_issue_a+cortex_r4_issue_alu)|\
+ (cortex_r4_issue_b+cortex_r4_issue_c+\
+ cortex_r4_issue_alu)")
+(define_reservation "cortex_r4_mov"
+ "cortex_r4_issue_a|(cortex_r4_issue_b+\
+ cortex_r4_issue_alu)")
+(define_reservation "cortex_r4_mul" "cortex_r4_issue_a+cortex_r4_issue_alu")
+(define_reservation "cortex_r4_mul_2"
+ "(cortex_r4_issue_a+cortex_r4_issue_alu)*2")
+;; Division instructions execute out-of-order with respect to the
+;; rest of the pipeline and only require reservations on their first and
+;; final cycles.
+(define_reservation "cortex_r4_div_9"
+ "cortex_r4_issue_a+cortex_r4_issue_alu,\
+ nothing*7,\
+ cortex_r4_issue_a+cortex_r4_issue_alu")
+(define_reservation "cortex_r4_div_10"
+ "cortex_r4_issue_a+cortex_r4_issue_alu,\
+ nothing*8,\
+ cortex_r4_issue_a+cortex_r4_issue_alu")
+(define_reservation "cortex_r4_load_store"
+ "cortex_r4_issue_a+cortex_r4_issue_c")
+(define_reservation "cortex_r4_load_store_2"
+ "(cortex_r4_issue_a+cortex_r4_issue_b)*2")
+(define_reservation "cortex_r4_branch" "cortex_r4_issue_b")
+
+;; We assume that all instructions are unconditional.
+
+;; Data processing instructions. Moves without shifts are kept separate
+;; for the purposes of the dual-issue constraints above.
+(define_insn_reservation "cortex_r4_alu" 2
+ (and (eq_attr "tune" "cortexr4")
+ (and (eq_attr "type" "alu")
+ (not (eq_attr "insn" "mov"))))
+ "cortex_r4_alu")
+
+(define_insn_reservation "cortex_r4_mov" 2
+ (and (eq_attr "tune" "cortexr4")
+ (and (eq_attr "type" "alu")
+ (eq_attr "insn" "mov")))
+ "cortex_r4_mov")
+
+(define_insn_reservation "cortex_r4_alu_shift" 2
+ (and (eq_attr "tune" "cortexr4")
+ (eq_attr "type" "alu_shift"))
+ "cortex_r4_alu")
+
+(define_insn_reservation "cortex_r4_alu_shift_reg" 2
+ (and (eq_attr "tune" "cortexr4")
+ (eq_attr "type" "alu_shift_reg"))
+ "cortex_r4_alu_shift_reg")
+
+;; An ALU instruction followed by an ALU instruction with no early dep.
+(define_bypass 1 "cortex_r4_alu,cortex_r4_alu_shift,cortex_r4_alu_shift_reg,\
+ cortex_r4_mov"
+ "cortex_r4_alu")
+(define_bypass 1 "cortex_r4_alu,cortex_r4_alu_shift,cortex_r4_alu_shift_reg,\
+ cortex_r4_mov"
+ "cortex_r4_alu_shift"
+ "arm_no_early_alu_shift_dep")
+(define_bypass 1 "cortex_r4_alu,cortex_r4_alu_shift,cortex_r4_alu_shift_reg,\
+ cortex_r4_mov"
+ "cortex_r4_alu_shift_reg"
+ "arm_no_early_alu_shift_value_dep")
+
+;; In terms of availabilities, a consumer mov could theoretically be
+;; issued together with a producer ALU instruction, without stalls.
+;; In practice this cannot happen because mov;add (in that order) is not
+;; eligible for dual issue and furthermore dual issue is not permitted
+;; when a dependency is involved. We therefore note it as latency one.
+;; A mov followed by another of the same is also latency one.
+(define_bypass 1 "cortex_r4_alu,cortex_r4_alu_shift,cortex_r4_alu_shift_reg,\
+ cortex_r4_mov"
+ "cortex_r4_mov")
+
+;; qadd, qdadd, qsub and qdsub are not currently emitted, and neither are
+;; media data processing instructions nor sad instructions.
+
+;; Multiplication instructions.
+
+(define_insn_reservation "cortex_r4_mul_4" 4
+ (and (eq_attr "tune" "cortexr4")
+ (eq_attr "insn" "mul,smmul"))
+ "cortex_r4_mul_2")
+
+(define_insn_reservation "cortex_r4_mul_3" 3
+ (and (eq_attr "tune" "cortexr4")
+ (eq_attr "insn" "smulxy,smulwy,smuad,smusd"))
+ "cortex_r4_mul")
+
+(define_insn_reservation "cortex_r4_mla_4" 4
+ (and (eq_attr "tune" "cortexr4")
+ (eq_attr "insn" "mla,smmla"))
+ "cortex_r4_mul_2")
+
+(define_insn_reservation "cortex_r4_mla_3" 3
+ (and (eq_attr "tune" "cortexr4")
+ (eq_attr "insn" "smlaxy,smlawy,smlad,smlsd"))
+ "cortex_r4_mul")
+
+(define_insn_reservation "cortex_r4_smlald" 3
+ (and (eq_attr "tune" "cortexr4")
+ (eq_attr "insn" "smlald,smlsld"))
+ "cortex_r4_mul")
+
+(define_insn_reservation "cortex_r4_mull" 4
+ (and (eq_attr "tune" "cortexr4")
+ (eq_attr "insn" "smull,umull,umlal,umaal"))
+ "cortex_r4_mul_2")
+
+;; A multiply or an MLA with a single-register result, followed by an
+;; MLA with an accumulator dependency, has its result forwarded.
+(define_bypass 2 "cortex_r4_mul_3,cortex_r4_mla_3"
+ "cortex_r4_mla_3,cortex_r4_mla_4"
+ "arm_mac_accumulator_is_mul_result")
+
+(define_bypass 3 "cortex_r4_mul_4,cortex_r4_mla_4"
+ "cortex_r4_mla_3,cortex_r4_mla_4"
+ "arm_mac_accumulator_is_mul_result")
+
+;; A multiply followed by an ALU instruction needing the multiply
+;; result only at ALU has lower latency than one needing it at Shift.
+(define_bypass 2 "cortex_r4_mul_3,cortex_r4_mla_3,cortex_r4_smlald"
+ "cortex_r4_alu")
+(define_bypass 2 "cortex_r4_mul_3,cortex_r4_mla_3,cortex_r4_smlald"
+ "cortex_r4_alu_shift"
+ "arm_no_early_alu_shift_dep")
+(define_bypass 2 "cortex_r4_mul_3,cortex_r4_mla_3,cortex_r4_smlald"
+ "cortex_r4_alu_shift_reg"
+ "arm_no_early_alu_shift_value_dep")
+(define_bypass 3 "cortex_r4_mul_4,cortex_r4_mla_4,cortex_r4_mull"
+ "cortex_r4_alu")
+(define_bypass 3 "cortex_r4_mul_4,cortex_r4_mla_4,cortex_r4_mull"
+ "cortex_r4_alu_shift"
+ "arm_no_early_alu_shift_dep")
+(define_bypass 3 "cortex_r4_mul_4,cortex_r4_mla_4,cortex_r4_mull"
+ "cortex_r4_alu_shift_reg"
+ "arm_no_early_alu_shift_value_dep")
+
+;; A multiply followed by a mov has one cycle lower latency again.
+(define_bypass 1 "cortex_r4_mul_3,cortex_r4_mla_3,cortex_r4_smlald"
+ "cortex_r4_mov")
+(define_bypass 2 "cortex_r4_mul_4,cortex_r4_mla_4,cortex_r4_mull"
+ "cortex_r4_mov")
+
+;; We guess that division of A/B using sdiv or udiv, on average,
+;; is performed with B having ten more leading zeros than A.
+;; This gives a latency of nine for udiv and ten for sdiv.
+(define_insn_reservation "cortex_r4_udiv" 9
+ (and (eq_attr "tune" "cortexr4")
+ (eq_attr "insn" "udiv"))
+ "cortex_r4_div_9")
+
+(define_insn_reservation "cortex_r4_sdiv" 10
+ (and (eq_attr "tune" "cortexr4")
+ (eq_attr "insn" "sdiv"))
+ "cortex_r4_div_10")
+
+;; Branches. We assume correct prediction.
+
+(define_insn_reservation "cortex_r4_branch" 0
+ (and (eq_attr "tune" "cortexr4")
+ (eq_attr "type" "branch"))
+ "cortex_r4_branch")
+
+;; Call latencies are not predictable. A semi-arbitrary very large
+;; number is used as "positive infinity" so that everything should be
+;; finished by the time of return.
+(define_insn_reservation "cortex_r4_call" 32
+ (and (eq_attr "tune" "cortexr4")
+ (eq_attr "type" "call"))
+ "nothing")
+
+;; Status register access instructions are not currently emitted.
+
+;; Load instructions.
+;; We do not model the "addr_md_3cycle" cases and assume that
+;; accesses following are correctly aligned.
+
+(define_insn_reservation "cortex_r4_load_1_2" 3
+ (and (eq_attr "tune" "cortexr4")
+ (eq_attr "type" "load1,load2"))
+ "cortex_r4_load_store")
+
+(define_insn_reservation "cortex_r4_load_3_4" 4
+ (and (eq_attr "tune" "cortexr4")
+ (eq_attr "type" "load3,load4"))
+ "cortex_r4_load_store_2")
+
+;; If a producing load is followed by an instruction consuming only
+;; as a Normal Reg, there is one fewer cycle of latency.
+
+(define_bypass 2 "cortex_r4_load_1_2"
+ "cortex_r4_alu")
+(define_bypass 2 "cortex_r4_load_1_2"
+ "cortex_r4_alu_shift"
+ "arm_no_early_alu_shift_dep")
+(define_bypass 2 "cortex_r4_load_1_2"
+ "cortex_r4_alu_shift_reg"
+ "arm_no_early_alu_shift_value_dep")
+
+(define_bypass 3 "cortex_r4_load_3_4"
+ "cortex_r4_alu")
+(define_bypass 3 "cortex_r4_load_3_4"
+ "cortex_r4_alu_shift"
+ "arm_no_early_alu_shift_dep")
+(define_bypass 3 "cortex_r4_load_3_4"
+ "cortex_r4_alu_shift_reg"
+ "arm_no_early_alu_shift_value_dep")
+
+;; If a producing load is followed by an instruction consuming only
+;; as a Late Reg, there are two fewer cycles of latency. Such consumer
+;; instructions are moves and stores.
+
+(define_bypass 1 "cortex_r4_load_1_2"
+ "cortex_r4_mov,cortex_r4_store_1_2,cortex_r4_store_3_4")
+(define_bypass 2 "cortex_r4_load_3_4"
+ "cortex_r4_mov,cortex_r4_store_1_2,cortex_r4_store_3_4")
+
+;; If a producer's result is required as the base or offset of a load,
+;; there is an extra cycle latency.
+
+(define_bypass 3 "cortex_r4_alu,cortex_r4_mov,cortex_r4_alu_shift,\
+ cortex_r4_alu_shift_reg"
+ "cortex_r4_load_1_2,cortex_r4_load_3_4")
+
+(define_bypass 4 "cortex_r4_mul_3,cortex_r4_mla_3,cortex_r4_smlald"
+ "cortex_r4_load_1_2,cortex_r4_load_3_4")
+
+(define_bypass 5 "cortex_r4_mul_4,cortex_r4_mla_4,cortex_r4_mull"
+ "cortex_r4_load_1_2,cortex_r4_load_3_4")
+
+;; Store instructions.
+
+(define_insn_reservation "cortex_r4_store_1_2" 0
+ (and (eq_attr "tune" "cortexr4")
+ (eq_attr "type" "store1,store2"))
+ "cortex_r4_load_store")
+
+(define_insn_reservation "cortex_r4_store_3_4" 0
+ (and (eq_attr "tune" "cortexr4")
+ (eq_attr "type" "store3,store4"))
+ "cortex_r4_load_store_2")
+
diff --git a/gcc/config/arm/elf.h b/gcc/config/arm/elf.h
index 65bd00fdc3c..37c366d5ac8 100644
--- a/gcc/config/arm/elf.h
+++ b/gcc/config/arm/elf.h
@@ -1,7 +1,7 @@
/* Definitions of target machine for GNU compiler.
For ARM with ELF obj format.
- Copyright (C) 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2004, 2005, 2007
- Free Software Foundation, Inc.
+ Copyright (C) 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2004, 2005, 2007,
+ 2008 Free Software Foundation, Inc.
Contributed by Philip Blundell <philb@gnu.org> and
Catherine Moore <clm@cygnus.com>
@@ -145,3 +145,17 @@
} \
while (0)
+/* Horrible hack: We want to prevent some libgcc routines being included
+ for some multilibs. */
+#ifndef __ARM_ARCH_6M__
+#undef L_fixdfsi
+#undef L_fixunsdfsi
+#undef L_truncdfsf2
+#undef L_fixsfsi
+#undef L_fixunssfsi
+#undef L_floatdidf
+#undef L_floatdisf
+#undef L_floatundidf
+#undef L_floatundisf
+#endif
+
diff --git a/gcc/config/arm/ieee754-df.S b/gcc/config/arm/ieee754-df.S
index ebf7e58dabb..a175fa6bb74 100644
--- a/gcc/config/arm/ieee754-df.S
+++ b/gcc/config/arm/ieee754-df.S
@@ -1,6 +1,6 @@
/* ieee754-df.S double-precision floating point support for ARM
- Copyright (C) 2003, 2004, 2005, 2007 Free Software Foundation, Inc.
+ Copyright (C) 2003, 2004, 2005, 2007, 2008 Free Software Foundation, Inc.
Contributed by Nicolas Pitre (nico@cam.org)
This file is free software; you can redistribute it and/or modify it
@@ -56,7 +56,7 @@
#endif
-#ifdef L_negdf2
+#ifdef L_arm_negdf2
ARM_FUNC_START negdf2
ARM_FUNC_ALIAS aeabi_dneg negdf2
@@ -70,7 +70,7 @@ ARM_FUNC_ALIAS aeabi_dneg negdf2
#endif
-#ifdef L_addsubdf3
+#ifdef L_arm_addsubdf3
ARM_FUNC_START aeabi_drsub
@@ -603,7 +603,7 @@ LSYM(f0_ret):
#endif /* L_addsubdf3 */
-#ifdef L_muldivdf3
+#ifdef L_arm_muldivdf3
ARM_FUNC_START muldf3
ARM_FUNC_ALIAS aeabi_dmul muldf3
@@ -1103,7 +1103,7 @@ LSYM(Ldv_s):
#endif /* L_muldivdf3 */
-#ifdef L_cmpdf2
+#ifdef L_arm_cmpdf2
@ Note: only r0 (return value) and ip are clobbered here.
@@ -1271,7 +1271,7 @@ ARM_FUNC_START aeabi_dcmpgt
#endif /* L_cmpdf2 */
-#ifdef L_unorddf2
+#ifdef L_arm_unorddf2
ARM_FUNC_START unorddf2
ARM_FUNC_ALIAS aeabi_dcmpun unorddf2
@@ -1297,7 +1297,7 @@ ARM_FUNC_ALIAS aeabi_dcmpun unorddf2
#endif /* L_unorddf2 */
-#ifdef L_fixdfsi
+#ifdef L_arm_fixdfsi
ARM_FUNC_START fixdfsi
ARM_FUNC_ALIAS aeabi_d2iz fixdfsi
@@ -1339,7 +1339,7 @@ ARM_FUNC_ALIAS aeabi_d2iz fixdfsi
#endif /* L_fixdfsi */
-#ifdef L_fixunsdfsi
+#ifdef L_arm_fixunsdfsi
ARM_FUNC_START fixunsdfsi
ARM_FUNC_ALIAS aeabi_d2uiz fixunsdfsi
@@ -1377,7 +1377,7 @@ ARM_FUNC_ALIAS aeabi_d2uiz fixunsdfsi
#endif /* L_fixunsdfsi */
-#ifdef L_truncdfsf2
+#ifdef L_arm_truncdfsf2
ARM_FUNC_START truncdfsf2
ARM_FUNC_ALIAS aeabi_d2f truncdfsf2
diff --git a/gcc/config/arm/ieee754-sf.S b/gcc/config/arm/ieee754-sf.S
index 405487a3855..2da156cd44d 100644
--- a/gcc/config/arm/ieee754-sf.S
+++ b/gcc/config/arm/ieee754-sf.S
@@ -1,6 +1,6 @@
/* ieee754-sf.S single-precision floating point support for ARM
- Copyright (C) 2003, 2004, 2005, 2007 Free Software Foundation, Inc.
+ Copyright (C) 2003, 2004, 2005, 2007, 2008 Free Software Foundation, Inc.
Contributed by Nicolas Pitre (nico@cam.org)
This file is free software; you can redistribute it and/or modify it
@@ -38,7 +38,7 @@
* if necessary without impacting performances.
*/
-#ifdef L_negsf2
+#ifdef L_arm_negsf2
ARM_FUNC_START negsf2
ARM_FUNC_ALIAS aeabi_fneg negsf2
@@ -51,7 +51,7 @@ ARM_FUNC_ALIAS aeabi_fneg negsf2
#endif
-#ifdef L_addsubsf3
+#ifdef L_arm_addsubsf3
ARM_FUNC_START aeabi_frsub
@@ -448,7 +448,7 @@ LSYM(f0_ret):
#endif /* L_addsubsf3 */
-#ifdef L_muldivsf3
+#ifdef L_arm_muldivsf3
ARM_FUNC_START mulsf3
ARM_FUNC_ALIAS aeabi_fmul mulsf3
@@ -795,7 +795,7 @@ LSYM(Ldv_s):
#endif /* L_muldivsf3 */
-#ifdef L_cmpsf2
+#ifdef L_arm_cmpsf2
@ The return value in r0 is
@
@@ -958,7 +958,7 @@ ARM_FUNC_START aeabi_fcmpgt
#endif /* L_cmpsf2 */
-#ifdef L_unordsf2
+#ifdef L_arm_unordsf2
ARM_FUNC_START unordsf2
ARM_FUNC_ALIAS aeabi_fcmpun unordsf2
@@ -983,7 +983,7 @@ ARM_FUNC_ALIAS aeabi_fcmpun unordsf2
#endif /* L_unordsf2 */
-#ifdef L_fixsfsi
+#ifdef L_arm_fixsfsi
ARM_FUNC_START fixsfsi
ARM_FUNC_ALIAS aeabi_f2iz fixsfsi
@@ -1025,7 +1025,7 @@ ARM_FUNC_ALIAS aeabi_f2iz fixsfsi
#endif /* L_fixsfsi */
-#ifdef L_fixunssfsi
+#ifdef L_arm_fixunssfsi
ARM_FUNC_START fixunssfsi
ARM_FUNC_ALIAS aeabi_f2uiz fixunssfsi
diff --git a/gcc/config/arm/lib1funcs.asm b/gcc/config/arm/lib1funcs.asm
index 0c6e440e598..2fc66be80d5 100644
--- a/gcc/config/arm/lib1funcs.asm
+++ b/gcc/config/arm/lib1funcs.asm
@@ -1,7 +1,7 @@
@ libgcc routines for ARM cpu.
@ Division routines, written by Richard Earnshaw, (rearnsha@armltd.co.uk)
-/* Copyright 1995, 1996, 1998, 1999, 2000, 2003, 2004, 2005, 2007
+/* Copyright 1995, 1996, 1998, 1999, 2000, 2003, 2004, 2005, 2007, 2008
Free Software Foundation, Inc.
This file is free software; you can redistribute it and/or modify it
@@ -94,7 +94,8 @@ Boston, MA 02110-1301, USA. */
#if defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) \
|| defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6Z__) \
- || defined(__ARM_ARCH_6ZK__) || defined(__ARM_ARCH_6T2__)
+ || defined(__ARM_ARCH_6ZK__) || defined(__ARM_ARCH_6T2__) \
+ || defined(__ARM_ARCH_6M__)
# define __ARM_ARCH__ 6
#endif
@@ -367,6 +368,9 @@ _L__\name:
#else /* !(__INTERWORKING_STUBS__ || __thumb2__) */
+#ifdef __ARM_ARCH_6M__
+#define EQUIV .thumb_set
+#else
.macro ARM_FUNC_START name
.text
.globl SYM (__\name)
@@ -379,6 +383,7 @@ SYM (__\name):
.macro ARM_CALL name
bl __\name
.endm
+#endif
#endif
@@ -391,6 +396,7 @@ SYM (__\name):
#endif
.endm
+#ifndef __ARM_ARCH_6M__
.macro ARM_FUNC_ALIAS new old
.globl SYM (__\new)
EQUIV SYM (__\new), SYM (__\old)
@@ -398,6 +404,7 @@ SYM (__\name):
.set SYM (_L__\new), SYM (_L__\old)
#endif
.endm
+#endif
#ifdef __thumb__
/* Register aliases. */
@@ -1256,8 +1263,8 @@ LSYM(Lover12):
#endif /* L_call_via_rX */
/* Don't bother with the old interworking routines for Thumb-2. */
-/* ??? Maybe only omit these on v7m. */
-#ifndef __thumb2__
+/* ??? Maybe only omit these on "m" variants. */
+#if !defined(__thumb2__) && !defined(__ARM_ARCH_6M__)
#if defined L_interwork_call_via_rX
@@ -1387,7 +1394,11 @@ LSYM(Lchange_\register):
#endif /* Arch supports thumb. */
#ifndef __symbian__
+#ifndef __ARM_ARCH_6M__
#include "ieee754-df.S"
#include "ieee754-sf.S"
#include "bpabi.S"
-#endif /* __symbian__ */
+#else /* __ARM_ARCH_6M__ */
+#include "bpabi-v6m.S"
+#endif /* __ARM_ARCH_6M__ */
+#endif /* !__symbian__ */
diff --git a/gcc/config/arm/libunwind.S b/gcc/config/arm/libunwind.S
index 0732e9f0af9..dd958548387 100644
--- a/gcc/config/arm/libunwind.S
+++ b/gcc/config/arm/libunwind.S
@@ -1,5 +1,5 @@
/* Support functions for the unwinder.
- Copyright (C) 2003, 2004, 2005, 2007 Free Software Foundation, Inc.
+ Copyright (C) 2003, 2004, 2005, 2007, 2008 Free Software Foundation, Inc.
Contributed by Paul Brook
This file is free software; you can redistribute it and/or modify it
@@ -53,6 +53,119 @@
#endif
#endif
+#ifdef __ARM_ARCH_6M__
+
+/* r0 points to a 16-word block. Upload these values to the actual core
+ state. */
+FUNC_START restore_core_regs
+ mov r1, r0
+ add r1, r1, #52
+ ldmia r1!, {r3, r4, r5}
+ sub r3, r3, #4
+ mov ip, r3
+ str r5, [r3]
+ mov lr, r4
+ /* Restore r8-r11. */
+ mov r1, r0
+ add r1, r1, #32
+ ldmia r1!, {r2, r3, r4, r5}
+ mov r8, r2
+ mov r9, r3
+ mov sl, r4
+ mov fp, r5
+ mov r1, r0
+ add r1, r1, #8
+ ldmia r1!, {r2, r3, r4, r5, r6, r7}
+ ldr r1, [r0, #4]
+ ldr r0, [r0]
+ mov sp, ip
+ pop {pc}
+ FUNC_END restore_core_regs
+ UNPREFIX restore_core_regs
+
+/* ARMV6M does not have coprocessors, so these should never be used. */
+FUNC_START gnu_Unwind_Restore_VFP
+ RET
+
+/* Store VFR regsters d0-d15 to the address in r0. */
+FUNC_START gnu_Unwind_Save_VFP
+ RET
+
+/* Load VFP registers d0-d15 from the address in r0.
+ Use this to load from FSTMD format. */
+FUNC_START gnu_Unwind_Restore_VFP_D
+ RET
+
+/* Store VFP registers d0-d15 to the address in r0.
+ Use this to store in FLDMD format. */
+FUNC_START gnu_Unwind_Save_VFP_D
+ RET
+
+/* Load VFP registers d16-d31 from the address in r0.
+ Use this to load from FSTMD (=VSTM) format. Needs VFPv3. */
+FUNC_START gnu_Unwind_Restore_VFP_D_16_to_31
+ RET
+
+/* Store VFP registers d16-d31 to the address in r0.
+ Use this to store in FLDMD (=VLDM) format. Needs VFPv3. */
+FUNC_START gnu_Unwind_Save_VFP_D_16_to_31
+ RET
+
+FUNC_START gnu_Unwind_Restore_WMMXD
+ RET
+
+FUNC_START gnu_Unwind_Save_WMMXD
+ RET
+
+FUNC_START gnu_Unwind_Restore_WMMXC
+ RET
+
+FUNC_START gnu_Unwind_Save_WMMXC
+ RET
+
+.macro UNWIND_WRAPPER name nargs
+ FUNC_START \name
+ /* Create a phase2_vrs structure. */
+ /* Save r0 in the PC slot so we can use it as a scratch register. */
+ push {r0}
+ add r0, sp, #4
+ push {r0, lr} /* Push original SP and LR. */
+ /* Make space for r8-r12. */
+ sub sp, sp, #20
+ /* Save low registers. */
+ push {r0, r1, r2, r3, r4, r5, r6, r7}
+ /* Save high registers. */
+ add r0, sp, #32
+ mov r1, r8
+ mov r2, r9
+ mov r3, sl
+ mov r4, fp
+ mov r5, ip
+ stmia r0!, {r1, r2, r3, r4, r5}
+ /* Restore original low register values. */
+ add r0, sp, #4
+ ldmia r0!, {r1, r2, r3, r4, r5}
+ /* Restore orginial r0. */
+ ldr r0, [sp, #60]
+ str r0, [sp]
+ /* Demand-save flags, plus an extra word for alignment. */
+ mov r3, #0
+ push {r2, r3}
+ /* Point r1 at the block. Pass r[0..nargs) unchanged. */
+ add r\nargs, sp, #4
+
+ bl SYM (__gnu\name)
+
+ ldr r3, [sp, #64]
+ add sp, sp, #72
+ bx r3
+
+ FUNC_END \name
+ UNPREFIX \name
+.endm
+
+#else /* !__ARM_ARCH_6M__ */
+
/* r0 points to a 16-word block. Upload these values to the actual core
state. */
ARM_FUNC_START restore_core_regs
@@ -233,6 +346,8 @@ ARM_FUNC_START gnu_Unwind_Save_WMMXC
UNPREFIX \name
.endm
+#endif /* !__ARM_ARCH_6M__ */
+
UNWIND_WRAPPER _Unwind_RaiseException 1
UNWIND_WRAPPER _Unwind_Resume 1
UNWIND_WRAPPER _Unwind_Resume_or_Rethrow 1
diff --git a/gcc/config/arm/sfp-machine.h b/gcc/config/arm/sfp-machine.h
new file mode 100644
index 00000000000..bc75737d73d
--- /dev/null
+++ b/gcc/config/arm/sfp-machine.h
@@ -0,0 +1,96 @@
+#define _FP_W_TYPE_SIZE 32
+#define _FP_W_TYPE unsigned long
+#define _FP_WS_TYPE signed long
+#define _FP_I_TYPE long
+
+#define _FP_MUL_MEAT_S(R,X,Y) \
+ _FP_MUL_MEAT_1_wide(_FP_WFRACBITS_S,R,X,Y,umul_ppmm)
+#define _FP_MUL_MEAT_D(R,X,Y) \
+ _FP_MUL_MEAT_2_wide(_FP_WFRACBITS_D,R,X,Y,umul_ppmm)
+#define _FP_MUL_MEAT_Q(R,X,Y) \
+ _FP_MUL_MEAT_4_wide(_FP_WFRACBITS_Q,R,X,Y,umul_ppmm)
+
+#define _FP_DIV_MEAT_S(R,X,Y) _FP_DIV_MEAT_1_loop(S,R,X,Y)
+#define _FP_DIV_MEAT_D(R,X,Y) _FP_DIV_MEAT_2_udiv(D,R,X,Y)
+#define _FP_DIV_MEAT_Q(R,X,Y) _FP_DIV_MEAT_4_udiv(Q,R,X,Y)
+
+#define _FP_NANFRAC_S ((_FP_QNANBIT_S << 1) - 1)
+#define _FP_NANFRAC_D ((_FP_QNANBIT_D << 1) - 1), -1
+#define _FP_NANFRAC_Q ((_FP_QNANBIT_Q << 1) - 1), -1, -1, -1
+#define _FP_NANSIGN_S 0
+#define _FP_NANSIGN_D 0
+#define _FP_NANSIGN_Q 0
+
+#define _FP_KEEPNANFRACP 1
+
+/* Someone please check this. */
+#define _FP_CHOOSENAN(fs, wc, R, X, Y, OP) \
+ do { \
+ if ((_FP_FRAC_HIGH_RAW_##fs(X) & _FP_QNANBIT_##fs) \
+ && !(_FP_FRAC_HIGH_RAW_##fs(Y) & _FP_QNANBIT_##fs)) \
+ { \
+ R##_s = Y##_s; \
+ _FP_FRAC_COPY_##wc(R,Y); \
+ } \
+ else \
+ { \
+ R##_s = X##_s; \
+ _FP_FRAC_COPY_##wc(R,X); \
+ } \
+ R##_c = FP_CLS_NAN; \
+ } while (0)
+
+#define __LITTLE_ENDIAN 1234
+#define __BIG_ENDIAN 4321
+
+#if defined __ARMEB__
+# define __BYTE_ORDER __BIG_ENDIAN
+#else
+# define __BYTE_ORDER __LITTLE_ENDIAN
+#endif
+
+
+/* Define ALIASNAME as a strong alias for NAME. */
+# define strong_alias(name, aliasname) _strong_alias(name, aliasname)
+# define _strong_alias(name, aliasname) \
+ extern __typeof (name) aliasname __attribute__ ((alias (#name)));
+
+#ifdef __ARM_EABI__
+/* Rename functions to their EABI names. */
+/* The comparison functions need wrappers for EABI semantics, so
+ leave them unmolested. */
+#define __negsf2 __aeabi_fneg
+#define __subsf3 __aeabi_fsub
+#define __addsf3 __aeabi_fadd
+#define __floatunsisf __aeabi_ui2f
+#define __floatsisf __aeabi_i2f
+#define __floatundisf __aeabi_ul2f
+#define __floatdisf __aeabi_l2f
+#define __mulsf3 __aeabi_fmul
+#define __divsf3 __aeabi_fdiv
+#define __unordsf2 __aeabi_fcmpun
+#define __fixsfsi __aeabi_f2iz
+#define __fixunssfsi __aeabi_f2uiz
+#define __fixsfdi __aeabi_f2lz
+#define __fixunssfdi __aeabi_f2ulz
+#define __floatdisf __aeabi_l2f
+
+#define __negdf2 __aeabi_dneg
+#define __subdf3 __aeabi_dsub
+#define __adddf3 __aeabi_dadd
+#define __floatunsidf __aeabi_ui2d
+#define __floatsidf __aeabi_i2d
+#define __extendsfdf2 __aeabi_f2d
+#define __truncdfsf2 __aeabi_d2f
+#define __floatundidf __aeabi_ul2d
+#define __floatdidf __aeabi_l2d
+#define __muldf3 __aeabi_dmul
+#define __divdf3 __aeabi_ddiv
+#define __unorddf2 __aeabi_dcmpun
+#define __fixdfsi __aeabi_d2iz
+#define __fixunsdfsi __aeabi_d2uiz
+#define __fixdfdi __aeabi_d2lz
+#define __fixunsdfdi __aeabi_d2ulz
+#define __floatdidf __aeabi_l2d
+
+#endif /* __ARM_EABI__ */
diff --git a/gcc/config/arm/t-arm-elf b/gcc/config/arm/t-arm-elf
index b423bbb3597..31ba396b433 100644
--- a/gcc/config/arm/t-arm-elf
+++ b/gcc/config/arm/t-arm-elf
@@ -1,10 +1,16 @@
LIB1ASMSRC = arm/lib1funcs.asm
+# For most CPUs we have an assembly soft-float implementations.
+# However this is not true for ARMv6M. Here we want to use the soft-fp C
+# implementation. The soft-fp code is only build for ARMv6M. This pulls
+# in the asm implementation for other CPUs.
LIB1ASMFUNCS = _udivsi3 _divsi3 _umodsi3 _modsi3 _dvmd_tls _bb_init_func \
_call_via_rX _interwork_call_via_rX \
_lshrdi3 _ashrdi3 _ashldi3 \
- _negdf2 _addsubdf3 _muldivdf3 _cmpdf2 _unorddf2 _fixdfsi _fixunsdfsi \
- _truncdfsf2 _negsf2 _addsubsf3 _muldivsf3 _cmpsf2 _unordsf2 \
- _fixsfsi _fixunssfsi _floatdidf _floatdisf _floatundidf _floatundisf
+ _arm_negdf2 _arm_addsubdf3 _arm_muldivdf3 _arm_cmpdf2 _arm_unorddf2 \
+ _arm_fixdfsi _arm_fixunsdfsi \
+ _arm_truncdfsf2 _arm_negsf2 _arm_addsubsf3 _arm_muldivsf3 \
+ _arm_cmpsf2 _arm_unordsf2 _arm_fixsfsi _arm_fixunssfsi \
+ _arm_floatdidf _arm_floatdisf _arm_floatundidf _arm_floatundisf
MULTILIB_OPTIONS = marm/mthumb
MULTILIB_DIRNAMES = arm thumb
diff --git a/gcc/config/arm/t-arm-softfp b/gcc/config/arm/t-arm-softfp
new file mode 100644
index 00000000000..4a97747b195
--- /dev/null
+++ b/gcc/config/arm/t-arm-softfp
@@ -0,0 +1,11 @@
+softfp_float_modes := sf df
+softfp_int_modes := si di
+softfp_extensions := sfdf
+softfp_truncations := dfsf
+softfp_machine_header := arm/sfp-machine.h
+softfp_exclude_libgcc2 := y
+softfp_wrap_start := '\#ifdef __ARM_ARCH_6M__'
+softfp_wrap_end := '\#endif'
+
+# softfp seems to be missing a whole bunch of prototypes.
+TARGET_LIBGCC2_CFLAGS += -Wno-missing-prototypes
diff --git a/gcc/config/arm/thumb2.md b/gcc/config/arm/thumb2.md
index 0efe31f0f6d..ba45c88c2cb 100644
--- a/gcc/config/arm/thumb2.md
+++ b/gcc/config/arm/thumb2.md
@@ -1,5 +1,5 @@
;; ARM Thumb-2 Machine Description
-;; Copyright (C) 2007 Free Software Foundation, Inc.
+;; Copyright (C) 2007, 2008 Free Software Foundation, Inc.
;; Written by CodeSourcery, LLC.
;;
;; This file is part of GCC.
@@ -1131,7 +1131,8 @@
(match_operand:SI 2 "s_register_operand" "r")))]
"TARGET_THUMB2 && arm_arch_hwdiv"
"sdiv%?\t%0, %1, %2"
- [(set_attr "predicable" "yes")]
+ [(set_attr "predicable" "yes")
+ (set_attr "insn" "sdiv")]
)
(define_insn "udivsi3"
@@ -1140,7 +1141,8 @@
(match_operand:SI 2 "s_register_operand" "r")))]
"TARGET_THUMB2 && arm_arch_hwdiv"
"udiv%?\t%0, %1, %2"
- [(set_attr "predicable" "yes")]
+ [(set_attr "predicable" "yes")
+ (set_attr "insn" "udiv")]
)
(define_insn "*thumb2_cbz"
diff --git a/gcc/config/i386/gthr-win32.c b/gcc/config/i386/gthr-win32.c
index c87a771ed8f..3810fc0f26c 100644
--- a/gcc/config/i386/gthr-win32.c
+++ b/gcc/config/i386/gthr-win32.c
@@ -151,6 +151,12 @@ __gthr_win32_mutex_init_function (__gthread_mutex_t *mutex)
mutex->sema = CreateSemaphore (NULL, 0, 65535, NULL);
}
+void
+__gthr_win32_mutex_destroy (__gthread_mutex_t *mutex)
+{
+ CloseHandle ((HANDLE) mutex->sema);
+}
+
int
__gthr_win32_mutex_lock (__gthread_mutex_t *mutex)
{
diff --git a/gcc/config/i386/i386-modes.def b/gcc/config/i386/i386-modes.def
index 105d3872b79..a23e8203ff3 100644
--- a/gcc/config/i386/i386-modes.def
+++ b/gcc/config/i386/i386-modes.def
@@ -17,6 +17,10 @@ You should have received a copy of the GNU General Public License
along with GCC; see the file COPYING3. If not see
<http://www.gnu.org/licenses/>. */
+/* In 32bit, DI mode uses 32bit registers. Only 4 byte alignment
+ is needed. */
+ADJUST_ALIGNMENT (DI, (TARGET_64BIT || TARGET_ALIGN_DOUBLE) ? 8 : 4);
+
/* The x86_64 ABI specifies both XF and TF modes.
XFmode is __float80 is IEEE extended; TFmode is __float128
is IEEE quad. */
@@ -75,6 +79,7 @@ VECTOR_MODES (INT, 8); /* V8QI V4HI V2SI */
VECTOR_MODES (INT, 16); /* V16QI V8HI V4SI V2DI */
VECTOR_MODES (FLOAT, 8); /* V4HF V2SF */
VECTOR_MODES (FLOAT, 16); /* V8HF V4SF V2DF */
+VECTOR_MODE (INT, DI, 1); /* V1DI */
VECTOR_MODE (INT, QI, 2); /* V2QI */
VECTOR_MODE (INT, DI, 4); /* V4DI */
VECTOR_MODE (INT, SI, 8); /* V8SI */
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index 5a4456d912b..74b2be23d9b 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -2700,6 +2700,18 @@ override_options (void)
target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
}
+ /* If stack probes are required, the space used for large function
+ arguments on the stack must also be probed, so enable
+ -maccumulate-outgoing-args so this happens in the prologue. */
+ if (TARGET_STACK_PROBE
+ && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
+ {
+ if (target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
+ warning (0, "stack probing requires -maccumulate-outgoing-args "
+ "for correctness");
+ target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
+ }
+
/* For sane SSE instruction set generation we need fcomi instruction.
It is safe to enable all CMOVE instructions. */
if (TARGET_SSE)
@@ -3826,7 +3838,7 @@ classify_argument (enum machine_mode mode, const_tree type,
}
/* for V1xx modes, just use the base mode */
- if (VECTOR_MODE_P (mode)
+ if (VECTOR_MODE_P (mode) && mode != V1DImode
&& GET_MODE_SIZE (GET_MODE_INNER (mode)) == bytes)
mode = GET_MODE_INNER (mode);
@@ -3898,6 +3910,7 @@ classify_argument (enum machine_mode mode, const_tree type,
classes[0] = X86_64_SSE_CLASS;
classes[1] = X86_64_SSEUP_CLASS;
return 2;
+ case V1DImode:
case V2SFmode:
case V2SImode:
case V4HImode:
@@ -4199,6 +4212,7 @@ function_arg_advance_32 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
case V4HImode:
case V2SImode:
case V2SFmode:
+ case V1DImode:
if (!type || !AGGREGATE_TYPE_P (type))
{
cum->mmx_words += words;
@@ -4362,6 +4376,7 @@ function_arg_32 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
case V4HImode:
case V2SImode:
case V2SFmode:
+ case V1DImode:
if (!type || !AGGREGATE_TYPE_P (type))
{
if (!TARGET_MMX && !warnedmmx && cum->warn_mmx)
@@ -16774,7 +16789,8 @@ ia32_multipass_dfa_lookahead (void)
int
ix86_constant_alignment (tree exp, int align)
{
- if (TREE_CODE (exp) == REAL_CST)
+ if (TREE_CODE (exp) == REAL_CST || TREE_CODE (exp) == VECTOR_CST
+ || TREE_CODE (exp) == INTEGER_CST)
{
if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
return 64;
@@ -17943,11 +17959,11 @@ static const struct builtin_description bdesc_2arg[] =
{ OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, UNKNOWN, 0 },
{ OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, UNKNOWN, 0 },
{ OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, UNKNOWN, 0 },
- { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_adddi3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, UNKNOWN, 0 },
+ { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_addv1di3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, UNKNOWN, 0 },
{ OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, UNKNOWN, 0 },
{ OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, UNKNOWN, 0 },
{ OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, UNKNOWN, 0 },
- { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_subdi3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, UNKNOWN, 0 },
+ { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_subv1di3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, UNKNOWN, 0 },
{ OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, UNKNOWN, 0 },
{ OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, UNKNOWN, 0 },
@@ -17998,25 +18014,6 @@ static const struct builtin_description bdesc_2arg[] =
{ OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtsi2ss, 0, IX86_BUILTIN_CVTSI2SS, UNKNOWN, 0 },
{ OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtsi2ssq, 0, IX86_BUILTIN_CVTSI642SS, UNKNOWN, 0 },
- { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, 0, IX86_BUILTIN_PSLLW, UNKNOWN, 0 },
- { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, 0, IX86_BUILTIN_PSLLWI, UNKNOWN, 0 },
- { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, 0, IX86_BUILTIN_PSLLD, UNKNOWN, 0 },
- { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, 0, IX86_BUILTIN_PSLLDI, UNKNOWN, 0 },
- { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQ, UNKNOWN, 0 },
- { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQI, UNKNOWN, 0 },
-
- { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, 0, IX86_BUILTIN_PSRLW, UNKNOWN, 0 },
- { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, 0, IX86_BUILTIN_PSRLWI, UNKNOWN, 0 },
- { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, 0, IX86_BUILTIN_PSRLD, UNKNOWN, 0 },
- { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, 0, IX86_BUILTIN_PSRLDI, UNKNOWN, 0 },
- { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQ, UNKNOWN, 0 },
- { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQI, UNKNOWN, 0 },
-
- { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, 0, IX86_BUILTIN_PSRAW, UNKNOWN, 0 },
- { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, 0, IX86_BUILTIN_PSRAWI, UNKNOWN, 0 },
- { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, 0, IX86_BUILTIN_PSRAD, UNKNOWN, 0 },
- { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, 0, IX86_BUILTIN_PSRADI, UNKNOWN, 0 },
-
{ OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_psadbw, 0, IX86_BUILTIN_PSADBW, UNKNOWN, 0 },
{ OPTION_MASK_ISA_MMX, CODE_FOR_mmx_pmaddwd, 0, IX86_BUILTIN_PMADDWD, UNKNOWN, 0 },
@@ -18128,17 +18125,6 @@ static const struct builtin_description bdesc_2arg[] =
{ OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_umulsidi3, 0, IX86_BUILTIN_PMULUDQ, UNKNOWN, 0 },
{ OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_umulv2siv2di3, 0, IX86_BUILTIN_PMULUDQ128, UNKNOWN, 0 },
- { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv8hi3, 0, IX86_BUILTIN_PSLLWI128, UNKNOWN, 0 },
- { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv4si3, 0, IX86_BUILTIN_PSLLDI128, UNKNOWN, 0 },
- { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv2di3, 0, IX86_BUILTIN_PSLLQI128, UNKNOWN, 0 },
-
- { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv8hi3, 0, IX86_BUILTIN_PSRLWI128, UNKNOWN, 0 },
- { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv4si3, 0, IX86_BUILTIN_PSRLDI128, UNKNOWN, 0 },
- { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv2di3, 0, IX86_BUILTIN_PSRLQI128, UNKNOWN, 0 },
-
- { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv8hi3, 0, IX86_BUILTIN_PSRAWI128, UNKNOWN, 0 },
- { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv4si3, 0, IX86_BUILTIN_PSRADI128, UNKNOWN, 0 },
-
{ OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmaddwd, 0, IX86_BUILTIN_PMADDWD128, UNKNOWN, 0 },
{ OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsi2sd, 0, IX86_BUILTIN_CVTSI2SD, UNKNOWN, 0 },
@@ -18200,6 +18186,7 @@ static const struct builtin_description bdesc_2arg[] =
static const struct builtin_description bdesc_1arg[] =
{
+ /* SSE */
{ OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB, UNKNOWN, 0 },
{ OPTION_MASK_ISA_SSE, CODE_FOR_sse_movmskps, 0, IX86_BUILTIN_MOVMSKPS, UNKNOWN, 0 },
@@ -18216,6 +18203,7 @@ static const struct builtin_description bdesc_1arg[] =
{ OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttss2si, 0, IX86_BUILTIN_CVTTSS2SI, UNKNOWN, 0 },
{ OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvttss2siq, 0, IX86_BUILTIN_CVTTSS2SI64, UNKNOWN, 0 },
+ /* SSE2 */
{ OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB128, UNKNOWN, 0 },
{ OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movmskpd, 0, IX86_BUILTIN_MOVMSKPD, UNKNOWN, 0 },
@@ -18573,6 +18561,8 @@ ix86_init_mmx_sse_builtins (void)
tree V16QI_type_node = build_vector_type_for_mode (char_type_node, V16QImode);
tree V2SI_type_node = build_vector_type_for_mode (intSI_type_node, V2SImode);
+ tree V1DI_type_node
+ = build_vector_type_for_mode (long_long_integer_type_node, V1DImode);
tree V2SF_type_node = build_vector_type_for_mode (float_type_node, V2SFmode);
tree V2DI_type_node
= build_vector_type_for_mode (long_long_integer_type_node, V2DImode);
@@ -18637,14 +18627,13 @@ ix86_init_mmx_sse_builtins (void)
tree v4hi_ftype_v4hi_int
= build_function_type_list (V4HI_type_node,
V4HI_type_node, integer_type_node, NULL_TREE);
- tree v4hi_ftype_v4hi_di
- = build_function_type_list (V4HI_type_node,
- V4HI_type_node, long_long_unsigned_type_node,
- NULL_TREE);
- tree v2si_ftype_v2si_di
+ tree v2si_ftype_v2si_int
= build_function_type_list (V2SI_type_node,
- V2SI_type_node, long_long_unsigned_type_node,
- NULL_TREE);
+ V2SI_type_node, integer_type_node, NULL_TREE);
+ tree v1di_ftype_v1di_int
+ = build_function_type_list (V1DI_type_node,
+ V1DI_type_node, integer_type_node, NULL_TREE);
+
tree void_ftype_void
= build_function_type (void_type_node, void_list_node);
tree void_ftype_unsigned
@@ -18711,10 +18700,9 @@ ix86_init_mmx_sse_builtins (void)
tree v2si_ftype_v2si_v2si
= build_function_type_list (V2SI_type_node,
V2SI_type_node, V2SI_type_node, NULL_TREE);
- tree di_ftype_di_di
- = build_function_type_list (long_long_unsigned_type_node,
- long_long_unsigned_type_node,
- long_long_unsigned_type_node, NULL_TREE);
+ tree v1di_ftype_v1di_v1di
+ = build_function_type_list (V1DI_type_node,
+ V1DI_type_node, V1DI_type_node, NULL_TREE);
tree di_ftype_di_di_int
= build_function_type_list (long_long_unsigned_type_node,
@@ -19170,8 +19158,8 @@ ix86_init_mmx_sse_builtins (void)
case V2SImode:
type = v2si_ftype_v2si_v2si;
break;
- case DImode:
- type = di_ftype_di_di;
+ case V1DImode:
+ type = v1di_ftype_v1di_v1di;
break;
default:
@@ -19263,16 +19251,25 @@ ix86_init_mmx_sse_builtins (void)
/* Add the remaining MMX insns with somewhat more complicated types. */
def_builtin (OPTION_MASK_ISA_MMX, "__builtin_ia32_emms", void_ftype_void, IX86_BUILTIN_EMMS);
- def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_psllw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSLLW);
- def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_pslld", v2si_ftype_v2si_di, IX86_BUILTIN_PSLLD);
- def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_psllq", di_ftype_di_di, IX86_BUILTIN_PSLLQ);
- def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_psrlw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRLW);
- def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_psrld", v2si_ftype_v2si_di, IX86_BUILTIN_PSRLD);
- def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_psrlq", di_ftype_di_di, IX86_BUILTIN_PSRLQ);
-
- def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_psraw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRAW);
- def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_psrad", v2si_ftype_v2si_di, IX86_BUILTIN_PSRAD);
+ def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_psllwi", v4hi_ftype_v4hi_int, IX86_BUILTIN_PSLLWI);
+ def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_pslldi", v2si_ftype_v2si_int, IX86_BUILTIN_PSLLDI);
+ def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_psllqi", v1di_ftype_v1di_int, IX86_BUILTIN_PSLLQI);
+ def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_psllw", v4hi_ftype_v4hi_v4hi, IX86_BUILTIN_PSLLW);
+ def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_pslld", v2si_ftype_v2si_v2si, IX86_BUILTIN_PSLLD);
+ def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_psllq", v1di_ftype_v1di_v1di, IX86_BUILTIN_PSLLQ);
+
+ def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_psrlwi", v4hi_ftype_v4hi_int, IX86_BUILTIN_PSRLWI);
+ def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_psrldi", v2si_ftype_v2si_int, IX86_BUILTIN_PSRLDI);
+ def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_psrlqi", v1di_ftype_v1di_int, IX86_BUILTIN_PSRLQI);
+ def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_psrlw", v4hi_ftype_v4hi_v4hi, IX86_BUILTIN_PSRLW);
+ def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_psrld", v2si_ftype_v2si_v2si, IX86_BUILTIN_PSRLD);
+ def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_psrlq", v1di_ftype_v1di_v1di, IX86_BUILTIN_PSRLQ);
+
+ def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_psrawi", v4hi_ftype_v4hi_int, IX86_BUILTIN_PSRAWI);
+ def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_psradi", v2si_ftype_v2si_int, IX86_BUILTIN_PSRADI);
+ def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_psraw", v4hi_ftype_v4hi_v4hi, IX86_BUILTIN_PSRAW);
+ def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_psrad", v2si_ftype_v2si_v2si, IX86_BUILTIN_PSRAD);
def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_pshufw", v4hi_ftype_v4hi_int, IX86_BUILTIN_PSHUFW);
def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_pmaddwd", v2si_ftype_v4hi_v4hi, IX86_BUILTIN_PMADDWD);
@@ -20817,6 +20814,39 @@ ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
emit_insn (pat);
return target;
+ case IX86_BUILTIN_PSLLW:
+ case IX86_BUILTIN_PSLLWI:
+ icode = CODE_FOR_mmx_ashlv4hi3;
+ goto do_pshift;
+ case IX86_BUILTIN_PSLLD:
+ case IX86_BUILTIN_PSLLDI:
+ icode = CODE_FOR_mmx_ashlv2si3;
+ goto do_pshift;
+ case IX86_BUILTIN_PSLLQ:
+ case IX86_BUILTIN_PSLLQI:
+ icode = CODE_FOR_mmx_ashlv1di3;
+ goto do_pshift;
+ case IX86_BUILTIN_PSRAW:
+ case IX86_BUILTIN_PSRAWI:
+ icode = CODE_FOR_mmx_ashrv4hi3;
+ goto do_pshift;
+ case IX86_BUILTIN_PSRAD:
+ case IX86_BUILTIN_PSRADI:
+ icode = CODE_FOR_mmx_ashrv2si3;
+ goto do_pshift;
+ case IX86_BUILTIN_PSRLW:
+ case IX86_BUILTIN_PSRLWI:
+ icode = CODE_FOR_mmx_lshrv4hi3;
+ goto do_pshift;
+ case IX86_BUILTIN_PSRLD:
+ case IX86_BUILTIN_PSRLDI:
+ icode = CODE_FOR_mmx_lshrv2si3;
+ goto do_pshift;
+ case IX86_BUILTIN_PSRLQ:
+ case IX86_BUILTIN_PSRLQI:
+ icode = CODE_FOR_mmx_lshrv1di3;
+ goto do_pshift;
+
case IX86_BUILTIN_PSLLW128:
case IX86_BUILTIN_PSLLWI128:
icode = CODE_FOR_ashlv8hi3;
diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h
index f2429846691..98cb72ac921 100644
--- a/gcc/config/i386/i386.h
+++ b/gcc/config/i386/i386.h
@@ -1120,8 +1120,9 @@ do { \
((MODE) == V2SFmode || (MODE) == SFmode)
#define VALID_MMX_REG_MODE(MODE) \
- ((MODE) == DImode || (MODE) == V8QImode || (MODE) == V4HImode \
- || (MODE) == V2SImode || (MODE) == SImode)
+ ((MODE == V1DImode) || (MODE) == DImode \
+ || (MODE) == V2SImode || (MODE) == SImode \
+ || (MODE) == V4HImode || (MODE) == V8QImode)
/* ??? No autovectorization into MMX or 3DNOW until we can reliably
place emms and femms instructions. */
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index 92a37280f5f..eb942d60d9c 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -95,7 +95,6 @@
(UNSPEC_RCP 45)
(UNSPEC_RSQRT 46)
(UNSPEC_SFENCE 47)
- (UNSPEC_NOP 48) ; prevents combiner cleverness
(UNSPEC_PFRCP 49)
(UNSPEC_PFRCPIT1 40)
(UNSPEC_PFRCPIT2 41)
@@ -19873,7 +19872,7 @@
(set (reg:SI SP_REG) (minus:SI (reg:SI SP_REG) (match_dup 0)))
(clobber (reg:CC FLAGS_REG))]
"!TARGET_64BIT && TARGET_STACK_PROBE"
- "call\t__alloca"
+ "call\t___chkstk"
[(set_attr "type" "multi")
(set_attr "length" "5")])
diff --git a/gcc/config/i386/mmintrin.h b/gcc/config/i386/mmintrin.h
index a04109be616..94800ad688e 100644
--- a/gcc/config/i386/mmintrin.h
+++ b/gcc/config/i386/mmintrin.h
@@ -42,6 +42,7 @@ typedef int __m64 __attribute__ ((__vector_size__ (8), __may_alias__));
typedef int __v2si __attribute__ ((__vector_size__ (8)));
typedef short __v4hi __attribute__ ((__vector_size__ (8)));
typedef char __v8qi __attribute__ ((__vector_size__ (8)));
+typedef long long __v1di __attribute__ ((__vector_size__ (8)));
/* Empty the multimedia state. */
static __inline void __attribute__((__always_inline__, __artificial__))
@@ -309,7 +310,7 @@ _m_paddd (__m64 __m1, __m64 __m2)
static __inline __m64 __attribute__((__always_inline__, __artificial__))
_mm_add_si64 (__m64 __m1, __m64 __m2)
{
- return (__m64) __builtin_ia32_paddq ((long long)__m1, (long long)__m2);
+ return (__m64) __builtin_ia32_paddq ((__v1di)__m1, (__v1di)__m2);
}
#endif
@@ -413,7 +414,7 @@ _m_psubd (__m64 __m1, __m64 __m2)
static __inline __m64 __attribute__((__always_inline__, __artificial__))
_mm_sub_si64 (__m64 __m1, __m64 __m2)
{
- return (__m64) __builtin_ia32_psubq ((long long)__m1, (long long)__m2);
+ return (__m64) __builtin_ia32_psubq ((__v1di)__m1, (__v1di)__m2);
}
#endif
@@ -520,7 +521,7 @@ _m_pmullw (__m64 __m1, __m64 __m2)
static __inline __m64 __attribute__((__always_inline__, __artificial__))
_mm_sll_pi16 (__m64 __m, __m64 __count)
{
- return (__m64) __builtin_ia32_psllw ((__v4hi)__m, (long long)__count);
+ return (__m64) __builtin_ia32_psllw ((__v4hi)__m, (__v4hi)__count);
}
static __inline __m64 __attribute__((__always_inline__, __artificial__))
@@ -532,7 +533,7 @@ _m_psllw (__m64 __m, __m64 __count)
static __inline __m64 __attribute__((__always_inline__, __artificial__))
_mm_slli_pi16 (__m64 __m, int __count)
{
- return (__m64) __builtin_ia32_psllw ((__v4hi)__m, __count);
+ return (__m64) __builtin_ia32_psllwi ((__v4hi)__m, __count);
}
static __inline __m64 __attribute__((__always_inline__, __artificial__))
@@ -545,7 +546,7 @@ _m_psllwi (__m64 __m, int __count)
static __inline __m64 __attribute__((__always_inline__, __artificial__))
_mm_sll_pi32 (__m64 __m, __m64 __count)
{
- return (__m64) __builtin_ia32_pslld ((__v2si)__m, (long long)__count);
+ return (__m64) __builtin_ia32_pslld ((__v2si)__m, (__v2si)__count);
}
static __inline __m64 __attribute__((__always_inline__, __artificial__))
@@ -557,7 +558,7 @@ _m_pslld (__m64 __m, __m64 __count)
static __inline __m64 __attribute__((__always_inline__, __artificial__))
_mm_slli_pi32 (__m64 __m, int __count)
{
- return (__m64) __builtin_ia32_pslld ((__v2si)__m, __count);
+ return (__m64) __builtin_ia32_pslldi ((__v2si)__m, __count);
}
static __inline __m64 __attribute__((__always_inline__, __artificial__))
@@ -570,7 +571,7 @@ _m_pslldi (__m64 __m, int __count)
static __inline __m64 __attribute__((__always_inline__, __artificial__))
_mm_sll_si64 (__m64 __m, __m64 __count)
{
- return (__m64) __builtin_ia32_psllq ((long long)__m, (long long)__count);
+ return (__m64) __builtin_ia32_psllq ((__v1di)__m, (__v1di)__count);
}
static __inline __m64 __attribute__((__always_inline__, __artificial__))
@@ -582,7 +583,7 @@ _m_psllq (__m64 __m, __m64 __count)
static __inline __m64 __attribute__((__always_inline__, __artificial__))
_mm_slli_si64 (__m64 __m, int __count)
{
- return (__m64) __builtin_ia32_psllq ((long long)__m, (long long)__count);
+ return (__m64) __builtin_ia32_psllqi ((__v1di)__m, __count);
}
static __inline __m64 __attribute__((__always_inline__, __artificial__))
@@ -595,7 +596,7 @@ _m_psllqi (__m64 __m, int __count)
static __inline __m64 __attribute__((__always_inline__, __artificial__))
_mm_sra_pi16 (__m64 __m, __m64 __count)
{
- return (__m64) __builtin_ia32_psraw ((__v4hi)__m, (long long)__count);
+ return (__m64) __builtin_ia32_psraw ((__v4hi)__m, (__v4hi)__count);
}
static __inline __m64 __attribute__((__always_inline__, __artificial__))
@@ -607,7 +608,7 @@ _m_psraw (__m64 __m, __m64 __count)
static __inline __m64 __attribute__((__always_inline__, __artificial__))
_mm_srai_pi16 (__m64 __m, int __count)
{
- return (__m64) __builtin_ia32_psraw ((__v4hi)__m, __count);
+ return (__m64) __builtin_ia32_psrawi ((__v4hi)__m, __count);
}
static __inline __m64 __attribute__((__always_inline__, __artificial__))
@@ -620,7 +621,7 @@ _m_psrawi (__m64 __m, int __count)
static __inline __m64 __attribute__((__always_inline__, __artificial__))
_mm_sra_pi32 (__m64 __m, __m64 __count)
{
- return (__m64) __builtin_ia32_psrad ((__v2si)__m, (long long)__count);
+ return (__m64) __builtin_ia32_psrad ((__v2si)__m, (__v2si)__count);
}
static __inline __m64 __attribute__((__always_inline__, __artificial__))
@@ -632,7 +633,7 @@ _m_psrad (__m64 __m, __m64 __count)
static __inline __m64 __attribute__((__always_inline__, __artificial__))
_mm_srai_pi32 (__m64 __m, int __count)
{
- return (__m64) __builtin_ia32_psrad ((__v2si)__m, __count);
+ return (__m64) __builtin_ia32_psradi ((__v2si)__m, __count);
}
static __inline __m64 __attribute__((__always_inline__, __artificial__))
@@ -645,7 +646,7 @@ _m_psradi (__m64 __m, int __count)
static __inline __m64 __attribute__((__always_inline__, __artificial__))
_mm_srl_pi16 (__m64 __m, __m64 __count)
{
- return (__m64) __builtin_ia32_psrlw ((__v4hi)__m, (long long)__count);
+ return (__m64) __builtin_ia32_psrlw ((__v4hi)__m, (__v4hi)__count);
}
static __inline __m64 __attribute__((__always_inline__, __artificial__))
@@ -657,7 +658,7 @@ _m_psrlw (__m64 __m, __m64 __count)
static __inline __m64 __attribute__((__always_inline__, __artificial__))
_mm_srli_pi16 (__m64 __m, int __count)
{
- return (__m64) __builtin_ia32_psrlw ((__v4hi)__m, __count);
+ return (__m64) __builtin_ia32_psrlwi ((__v4hi)__m, __count);
}
static __inline __m64 __attribute__((__always_inline__, __artificial__))
@@ -670,7 +671,7 @@ _m_psrlwi (__m64 __m, int __count)
static __inline __m64 __attribute__((__always_inline__, __artificial__))
_mm_srl_pi32 (__m64 __m, __m64 __count)
{
- return (__m64) __builtin_ia32_psrld ((__v2si)__m, (long long)__count);
+ return (__m64) __builtin_ia32_psrld ((__v2si)__m, (__v2si)__count);
}
static __inline __m64 __attribute__((__always_inline__, __artificial__))
@@ -682,7 +683,7 @@ _m_psrld (__m64 __m, __m64 __count)
static __inline __m64 __attribute__((__always_inline__, __artificial__))
_mm_srli_pi32 (__m64 __m, int __count)
{
- return (__m64) __builtin_ia32_psrld ((__v2si)__m, __count);
+ return (__m64) __builtin_ia32_psrldi ((__v2si)__m, __count);
}
static __inline __m64 __attribute__((__always_inline__, __artificial__))
@@ -695,7 +696,7 @@ _m_psrldi (__m64 __m, int __count)
static __inline __m64 __attribute__((__always_inline__, __artificial__))
_mm_srl_si64 (__m64 __m, __m64 __count)
{
- return (__m64) __builtin_ia32_psrlq ((long long)__m, (long long)__count);
+ return (__m64) __builtin_ia32_psrlq ((__v1di)__m, (__v1di)__count);
}
static __inline __m64 __attribute__((__always_inline__, __artificial__))
@@ -707,7 +708,7 @@ _m_psrlq (__m64 __m, __m64 __count)
static __inline __m64 __attribute__((__always_inline__, __artificial__))
_mm_srli_si64 (__m64 __m, int __count)
{
- return (__m64) __builtin_ia32_psrlq ((long long)__m, (long long)__count);
+ return (__m64) __builtin_ia32_psrlqi ((__v1di)__m, __count);
}
static __inline __m64 __attribute__((__always_inline__, __artificial__))
diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md
index 3371161f82f..1d2a41dd114 100644
--- a/gcc/config/i386/mmx.md
+++ b/gcc/config/i386/mmx.md
@@ -32,16 +32,18 @@
;; 8 byte integral modes handled by MMX (and by extension, SSE)
(define_mode_iterator MMXMODEI [V8QI V4HI V2SI])
+(define_mode_iterator MMXMODEI8 [V8QI V4HI V2SI V1DI])
;; All 8-byte vector modes handled by MMX
-(define_mode_iterator MMXMODE [V8QI V4HI V2SI V2SF])
+(define_mode_iterator MMXMODE [V8QI V4HI V2SI V1DI V2SF])
;; Mix-n-match
(define_mode_iterator MMXMODE12 [V8QI V4HI])
(define_mode_iterator MMXMODE24 [V4HI V2SI])
+(define_mode_iterator MMXMODE248 [V4HI V2SI V1DI])
;; Mapping from integer vector mode to mnemonic suffix
-(define_mode_attr mmxvecsize [(V8QI "b") (V4HI "w") (V2SI "d") (DI "q")])
+(define_mode_attr mmxvecsize [(V8QI "b") (V4HI "w") (V2SI "d") (V1DI "q")])
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;
@@ -53,8 +55,8 @@
;; This is essential for maintaining stable calling conventions.
(define_expand "mov<mode>"
- [(set (match_operand:MMXMODEI 0 "nonimmediate_operand" "")
- (match_operand:MMXMODEI 1 "nonimmediate_operand" ""))]
+ [(set (match_operand:MMXMODEI8 0 "nonimmediate_operand" "")
+ (match_operand:MMXMODEI8 1 "nonimmediate_operand" ""))]
"TARGET_MMX"
{
ix86_expand_vector_move (<MODE>mode, operands);
@@ -62,9 +64,9 @@
})
(define_insn "*mov<mode>_internal_rex64"
- [(set (match_operand:MMXMODEI 0 "nonimmediate_operand"
+ [(set (match_operand:MMXMODEI8 0 "nonimmediate_operand"
"=rm,r,!y,!y ,m ,!y,Y2,x,x ,m,r,x")
- (match_operand:MMXMODEI 1 "vector_move_operand"
+ (match_operand:MMXMODEI8 1 "vector_move_operand"
"Cr ,m,C ,!ym,!y,Y2,!y,C,xm,x,x,r"))]
"TARGET_64BIT && TARGET_MMX
&& !(MEM_P (operands[0]) && MEM_P (operands[1]))"
@@ -86,9 +88,9 @@
(set_attr "mode" "DI")])
(define_insn "*mov<mode>_internal"
- [(set (match_operand:MMXMODEI 0 "nonimmediate_operand"
+ [(set (match_operand:MMXMODEI8 0 "nonimmediate_operand"
"=!y,!y ,m ,!y ,*Y2,*Y2,*Y2 ,m ,*x,*x,*x,m ,?r ,?m")
- (match_operand:MMXMODEI 1 "vector_move_operand"
+ (match_operand:MMXMODEI8 1 "vector_move_operand"
"C ,!ym,!y,*Y2,!y ,C ,*Y2m,*Y2,C ,*x,m ,*x,irm,r"))]
"TARGET_MMX
&& !(MEM_P (operands[0]) && MEM_P (operands[1]))"
@@ -557,26 +559,16 @@
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(define_insn "mmx_add<mode>3"
- [(set (match_operand:MMXMODEI 0 "register_operand" "=y")
- (plus:MMXMODEI
- (match_operand:MMXMODEI 1 "nonimmediate_operand" "%0")
- (match_operand:MMXMODEI 2 "nonimmediate_operand" "ym")))]
- "TARGET_MMX && ix86_binary_operator_ok (PLUS, <MODE>mode, operands)"
+ [(set (match_operand:MMXMODEI8 0 "register_operand" "=y")
+ (plus:MMXMODEI8
+ (match_operand:MMXMODEI8 1 "nonimmediate_operand" "%0")
+ (match_operand:MMXMODEI8 2 "nonimmediate_operand" "ym")))]
+ "(TARGET_MMX || (TARGET_SSE2 && <MODE>mode == V1DImode))
+ && ix86_binary_operator_ok (PLUS, <MODE>mode, operands)"
"padd<mmxvecsize>\t{%2, %0|%0, %2}"
[(set_attr "type" "mmxadd")
(set_attr "mode" "DI")])
-(define_insn "mmx_adddi3"
- [(set (match_operand:DI 0 "register_operand" "=y")
- (unspec:DI
- [(plus:DI (match_operand:DI 1 "nonimmediate_operand" "%0")
- (match_operand:DI 2 "nonimmediate_operand" "ym"))]
- UNSPEC_NOP))]
- "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, DImode, operands)"
- "paddq\t{%2, %0|%0, %2}"
- [(set_attr "type" "mmxadd")
- (set_attr "mode" "DI")])
-
(define_insn "mmx_ssadd<mode>3"
[(set (match_operand:MMXMODE12 0 "register_operand" "=y")
(ss_plus:MMXMODE12
@@ -598,26 +590,15 @@
(set_attr "mode" "DI")])
(define_insn "mmx_sub<mode>3"
- [(set (match_operand:MMXMODEI 0 "register_operand" "=y")
- (minus:MMXMODEI
- (match_operand:MMXMODEI 1 "register_operand" "0")
- (match_operand:MMXMODEI 2 "nonimmediate_operand" "ym")))]
- "TARGET_MMX"
+ [(set (match_operand:MMXMODEI8 0 "register_operand" "=y")
+ (minus:MMXMODEI8
+ (match_operand:MMXMODEI8 1 "register_operand" "0")
+ (match_operand:MMXMODEI8 2 "nonimmediate_operand" "ym")))]
+ "(TARGET_MMX || (TARGET_SSE2 && <MODE>mode == V1DImode))"
"psub<mmxvecsize>\t{%2, %0|%0, %2}"
[(set_attr "type" "mmxadd")
(set_attr "mode" "DI")])
-(define_insn "mmx_subdi3"
- [(set (match_operand:DI 0 "register_operand" "=y")
- (unspec:DI
- [(minus:DI (match_operand:DI 1 "register_operand" "0")
- (match_operand:DI 2 "nonimmediate_operand" "ym"))]
- UNSPEC_NOP))]
- "TARGET_SSE2"
- "psubq\t{%2, %0|%0, %2}"
- [(set_attr "type" "mmxadd")
- (set_attr "mode" "DI")])
-
(define_insn "mmx_sssub<mode>3"
[(set (match_operand:MMXMODE12 0 "register_operand" "=y")
(ss_minus:MMXMODE12
@@ -778,54 +759,32 @@
[(set (match_operand:MMXMODE24 0 "register_operand" "=y")
(ashiftrt:MMXMODE24
(match_operand:MMXMODE24 1 "register_operand" "0")
- (match_operand:DI 2 "nonmemory_operand" "yi")))]
+ (match_operand:SI 2 "nonmemory_operand" "yN")))]
"TARGET_MMX"
"psra<mmxvecsize>\t{%2, %0|%0, %2}"
[(set_attr "type" "mmxshft")
(set_attr "mode" "DI")])
(define_insn "mmx_lshr<mode>3"
- [(set (match_operand:MMXMODE24 0 "register_operand" "=y")
- (lshiftrt:MMXMODE24
- (match_operand:MMXMODE24 1 "register_operand" "0")
- (match_operand:DI 2 "nonmemory_operand" "yi")))]
+ [(set (match_operand:MMXMODE248 0 "register_operand" "=y")
+ (lshiftrt:MMXMODE248
+ (match_operand:MMXMODE248 1 "register_operand" "0")
+ (match_operand:SI 2 "nonmemory_operand" "yN")))]
"TARGET_MMX"
"psrl<mmxvecsize>\t{%2, %0|%0, %2}"
[(set_attr "type" "mmxshft")
(set_attr "mode" "DI")])
-(define_insn "mmx_lshrdi3"
- [(set (match_operand:DI 0 "register_operand" "=y")
- (unspec:DI
- [(lshiftrt:DI (match_operand:DI 1 "register_operand" "0")
- (match_operand:DI 2 "nonmemory_operand" "yi"))]
- UNSPEC_NOP))]
- "TARGET_MMX"
- "psrlq\t{%2, %0|%0, %2}"
- [(set_attr "type" "mmxshft")
- (set_attr "mode" "DI")])
-
(define_insn "mmx_ashl<mode>3"
- [(set (match_operand:MMXMODE24 0 "register_operand" "=y")
- (ashift:MMXMODE24
- (match_operand:MMXMODE24 1 "register_operand" "0")
- (match_operand:DI 2 "nonmemory_operand" "yi")))]
+ [(set (match_operand:MMXMODE248 0 "register_operand" "=y")
+ (ashift:MMXMODE248
+ (match_operand:MMXMODE248 1 "register_operand" "0")
+ (match_operand:SI 2 "nonmemory_operand" "yN")))]
"TARGET_MMX"
"psll<mmxvecsize>\t{%2, %0|%0, %2}"
[(set_attr "type" "mmxshft")
(set_attr "mode" "DI")])
-(define_insn "mmx_ashldi3"
- [(set (match_operand:DI 0 "register_operand" "=y")
- (unspec:DI
- [(ashift:DI (match_operand:DI 1 "register_operand" "0")
- (match_operand:DI 2 "nonmemory_operand" "yi"))]
- UNSPEC_NOP))]
- "TARGET_MMX"
- "psllq\t{%2, %0|%0, %2}"
- [(set_attr "type" "mmxshft")
- (set_attr "mode" "DI")])
-
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;
;; Parallel integral comparisons
diff --git a/gcc/config/i386/smmintrin.h b/gcc/config/i386/smmintrin.h
index e5b0ae04c0f..08bfd107232 100644
--- a/gcc/config/i386/smmintrin.h
+++ b/gcc/config/i386/smmintrin.h
@@ -470,30 +470,30 @@ _mm_stream_load_si128 (__m128i *__X)
#ifdef __SSE4_2__
/* These macros specify the source data format. */
-#define SIDD_UBYTE_OPS 0x00
-#define SIDD_UWORD_OPS 0x01
-#define SIDD_SBYTE_OPS 0x02
-#define SIDD_SWORD_OPS 0x03
+#define _SIDD_UBYTE_OPS 0x00
+#define _SIDD_UWORD_OPS 0x01
+#define _SIDD_SBYTE_OPS 0x02
+#define _SIDD_SWORD_OPS 0x03
/* These macros specify the comparison operation. */
-#define SIDD_CMP_EQUAL_ANY 0x00
-#define SIDD_CMP_RANGES 0x04
-#define SIDD_CMP_EQUAL_EACH 0x08
-#define SIDD_CMP_EQUAL_ORDERED 0x0c
+#define _SIDD_CMP_EQUAL_ANY 0x00
+#define _SIDD_CMP_RANGES 0x04
+#define _SIDD_CMP_EQUAL_EACH 0x08
+#define _SIDD_CMP_EQUAL_ORDERED 0x0c
/* These macros specify the the polarity. */
-#define SIDD_POSITIVE_POLARITY 0x00
-#define SIDD_NEGATIVE_POLARITY 0x10
-#define SIDD_MASKED_POSITIVE_POLARITY 0x20
-#define SIDD_MASKED_NEGATIVE_POLARITY 0x30
+#define _SIDD_POSITIVE_POLARITY 0x00
+#define _SIDD_NEGATIVE_POLARITY 0x10
+#define _SIDD_MASKED_POSITIVE_POLARITY 0x20
+#define _SIDD_MASKED_NEGATIVE_POLARITY 0x30
/* These macros specify the output selection in _mm_cmpXstri (). */
-#define SIDD_LEAST_SIGNIFICANT 0x00
-#define SIDD_MOST_SIGNIFICANT 0x40
+#define _SIDD_LEAST_SIGNIFICANT 0x00
+#define _SIDD_MOST_SIGNIFICANT 0x40
/* These macros specify the output selection in _mm_cmpXstrm (). */
-#define SIDD_BIT_MASK 0x00
-#define SIDD_UNIT_MASK 0x40
+#define _SIDD_BIT_MASK 0x00
+#define _SIDD_UNIT_MASK 0x40
/* Intrinsics for text/string processing. */
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index 97250dbd2ed..8ea5bc0048b 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -5969,7 +5969,7 @@
(mult:V8HI
(zero_extend:V8HI
(vec_select:V4QI
- (match_operand:V16QI 1 "nonimmediate_operand" "%0")
+ (match_operand:V16QI 1 "nonimmediate_operand" "0")
(parallel [(const_int 0)
(const_int 2)
(const_int 4)
@@ -6023,7 +6023,7 @@
(mult:V4HI
(zero_extend:V4HI
(vec_select:V4QI
- (match_operand:V8QI 1 "nonimmediate_operand" "%0")
+ (match_operand:V8QI 1 "nonimmediate_operand" "0")
(parallel [(const_int 0)
(const_int 2)
(const_int 4)
diff --git a/gcc/config/rs6000/altivec.md b/gcc/config/rs6000/altivec.md
index 2261a9369f3..95c588ca070 100644
--- a/gcc/config/rs6000/altivec.md
+++ b/gcc/config/rs6000/altivec.md
@@ -64,7 +64,6 @@
(UNSPEC_VPKUWUS 102)
(UNSPEC_VPKSWUS 103)
(UNSPEC_VRL 104)
- (UNSPEC_VSL 107)
(UNSPEC_VSLV4SI 110)
(UNSPEC_VSLO 111)
(UNSPEC_VSR 118)
@@ -576,7 +575,7 @@
/* Generate [-0.0, -0.0, -0.0, -0.0]. */
neg0 = gen_reg_rtx (V4SImode);
emit_insn (gen_altivec_vspltisw (neg0, constm1_rtx));
- emit_insn (gen_altivec_vslw (neg0, neg0, neg0));
+ emit_insn (gen_ashlv4si3 (neg0, neg0, neg0));
/* Use the multiply-add. */
emit_insn (gen_altivec_vmaddfp (operands[0], operands[1], operands[2],
@@ -635,7 +634,7 @@
high_product = gen_reg_rtx (V4SImode);
emit_insn (gen_altivec_vmsumuhm (high_product, one, small_swap, zero));
- emit_insn (gen_altivec_vslw (high_product, high_product, sixteen));
+ emit_insn (gen_ashlv4si3 (high_product, high_product, sixteen));
emit_insn (gen_addv4si3 (operands[0], high_product, low_product));
@@ -1221,15 +1220,6 @@
"vrl<VI_char> %0,%1,%2"
[(set_attr "type" "vecsimple")])
-(define_insn "altivec_vsl<VI_char>"
- [(set (match_operand:VI 0 "register_operand" "=v")
- (unspec:VI [(match_operand:VI 1 "register_operand" "v")
- (match_operand:VI 2 "register_operand" "v")]
- UNSPEC_VSL))]
- "TARGET_ALTIVEC"
- "vsl<VI_char> %0,%1,%2"
- [(set_attr "type" "vecsimple")])
-
(define_insn "altivec_vsl"
[(set (match_operand:V4SI 0 "register_operand" "=v")
(unspec:V4SI [(match_operand:V4SI 1 "register_operand" "v")
@@ -1248,6 +1238,14 @@
"vslo %0,%1,%2"
[(set_attr "type" "vecperm")])
+(define_insn "ashl<mode>3"
+ [(set (match_operand:VI 0 "register_operand" "=v")
+ (ashift:VI (match_operand:VI 1 "register_operand" "v")
+ (match_operand:VI 2 "register_operand" "v") ))]
+ "TARGET_ALTIVEC"
+ "vsl<VI_char> %0,%1,%2"
+ [(set_attr "type" "vecsimple")])
+
(define_insn "lshr<mode>3"
[(set (match_operand:VI 0 "register_operand" "=v")
(lshiftrt:VI (match_operand:VI 1 "register_operand" "v")
@@ -2039,7 +2037,7 @@
[(set (match_dup 2)
(vec_duplicate:V4SI (const_int -1)))
(set (match_dup 3)
- (unspec:V4SI [(match_dup 2) (match_dup 2)] UNSPEC_VSL))
+ (ashift:V4SI (match_dup 2) (match_dup 2)))
(set (match_operand:V4SF 0 "register_operand" "=v")
(and:V4SF (not:V4SF (subreg:V4SF (match_dup 3) 0))
(match_operand:V4SF 1 "register_operand" "v")))]
@@ -2642,7 +2640,7 @@
/* Generate [-0.0, -0.0, -0.0, -0.0]. */
neg0 = gen_reg_rtx (V4SImode);
emit_insn (gen_altivec_vspltisw (neg0, constm1_rtx));
- emit_insn (gen_altivec_vslw (neg0, neg0, neg0));
+ emit_insn (gen_ashlv4si3 (neg0, neg0, neg0));
/* XOR */
emit_insn (gen_xorv4sf3 (operands[0],
diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c
index 30f226bf46e..593408f56d5 100644
--- a/gcc/config/rs6000/rs6000.c
+++ b/gcc/config/rs6000/rs6000.c
@@ -732,7 +732,6 @@ static const char *rs6000_invalid_within_doloop (const_rtx);
static rtx rs6000_generate_compare (enum rtx_code);
static void rs6000_emit_stack_tie (void);
static void rs6000_frame_related (rtx, rtx, HOST_WIDE_INT, rtx, rtx);
-static rtx spe_synthesize_frame_save (rtx);
static bool spe_func_has_64bit_regs_p (void);
static void emit_frame_save (rtx, rtx, enum machine_mode, unsigned int,
int, HOST_WIDE_INT);
@@ -3616,6 +3615,7 @@ rs6000_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
&& GET_CODE (XEXP (x, 1)) == CONST_INT
&& (unsigned HOST_WIDE_INT) (INTVAL (XEXP (x, 1)) + 0x8000) >= 0x10000
&& !(SPE_VECTOR_MODE (mode)
+ || ALTIVEC_VECTOR_MODE (mode)
|| (TARGET_E500_DOUBLE && (mode == DFmode || mode == TFmode
|| mode == DImode))))
{
@@ -3633,11 +3633,12 @@ rs6000_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
&& GET_MODE_NUNITS (mode) == 1
&& ((TARGET_HARD_FLOAT && TARGET_FPRS)
|| TARGET_POWERPC64
- || (((mode != DImode && mode != DFmode && mode != DDmode)
- || TARGET_E500_DOUBLE)
- && mode != TFmode && mode != TDmode))
+ || ((mode != DImode && mode != DFmode && mode != DDmode)
+ || TARGET_E500_DOUBLE))
&& (TARGET_POWERPC64 || mode != DImode)
- && mode != TImode)
+ && mode != TImode
+ && mode != TFmode
+ && mode != TDmode)
{
return gen_rtx_PLUS (Pmode, XEXP (x, 0),
force_reg (Pmode, force_operand (XEXP (x, 1), 0)));
@@ -7089,9 +7090,9 @@ static struct builtin_description bdesc_2arg[] =
{ MASK_ALTIVEC, CODE_FOR_altivec_vrlb, "__builtin_altivec_vrlb", ALTIVEC_BUILTIN_VRLB },
{ MASK_ALTIVEC, CODE_FOR_altivec_vrlh, "__builtin_altivec_vrlh", ALTIVEC_BUILTIN_VRLH },
{ MASK_ALTIVEC, CODE_FOR_altivec_vrlw, "__builtin_altivec_vrlw", ALTIVEC_BUILTIN_VRLW },
- { MASK_ALTIVEC, CODE_FOR_altivec_vslb, "__builtin_altivec_vslb", ALTIVEC_BUILTIN_VSLB },
- { MASK_ALTIVEC, CODE_FOR_altivec_vslh, "__builtin_altivec_vslh", ALTIVEC_BUILTIN_VSLH },
- { MASK_ALTIVEC, CODE_FOR_altivec_vslw, "__builtin_altivec_vslw", ALTIVEC_BUILTIN_VSLW },
+ { MASK_ALTIVEC, CODE_FOR_ashlv16qi3, "__builtin_altivec_vslb", ALTIVEC_BUILTIN_VSLB },
+ { MASK_ALTIVEC, CODE_FOR_ashlv8hi3, "__builtin_altivec_vslh", ALTIVEC_BUILTIN_VSLH },
+ { MASK_ALTIVEC, CODE_FOR_ashlv4si3, "__builtin_altivec_vslw", ALTIVEC_BUILTIN_VSLW },
{ MASK_ALTIVEC, CODE_FOR_altivec_vsl, "__builtin_altivec_vsl", ALTIVEC_BUILTIN_VSL },
{ MASK_ALTIVEC, CODE_FOR_altivec_vslo, "__builtin_altivec_vslo", ALTIVEC_BUILTIN_VSLO },
{ MASK_ALTIVEC, CODE_FOR_altivec_vspltb, "__builtin_altivec_vspltb", ALTIVEC_BUILTIN_VSPLTB },
@@ -13902,6 +13903,9 @@ rs6000_expand_compare_and_swapqhi (rtx dst, rtx mem, rtx oldval, rtx newval)
emit_insn (gen_sync_compare_and_swapqhi_internal (wdst, mask,
oldval, newval, mem));
+ /* Shift the result back. */
+ emit_insn (gen_lshrsi3 (wdst, wdst, shift));
+
emit_move_insn (dst, gen_lowpart (mode, wdst));
}
@@ -15386,77 +15390,12 @@ rs6000_frame_related (rtx insn, rtx reg, HOST_WIDE_INT val,
}
}
- if (TARGET_SPE)
- real = spe_synthesize_frame_save (real);
-
RTX_FRAME_RELATED_P (insn) = 1;
REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
real,
REG_NOTES (insn));
}
-/* Given an SPE frame note, return a PARALLEL of SETs with the
- original note, plus a synthetic register save. */
-
-static rtx
-spe_synthesize_frame_save (rtx real)
-{
- rtx synth, offset, reg, real2;
-
- if (GET_CODE (real) != SET
- || GET_MODE (SET_SRC (real)) != V2SImode)
- return real;
-
- /* For the SPE, registers saved in 64-bits, get a PARALLEL for their
- frame related note. The parallel contains a set of the register
- being saved, and another set to a synthetic register (n+1200).
- This is so we can differentiate between 64-bit and 32-bit saves.
- Words cannot describe this nastiness. */
-
- gcc_assert (GET_CODE (SET_DEST (real)) == MEM
- && GET_CODE (XEXP (SET_DEST (real), 0)) == PLUS
- && GET_CODE (SET_SRC (real)) == REG);
-
- /* Transform:
- (set (mem (plus (reg x) (const y)))
- (reg z))
- into:
- (set (mem (plus (reg x) (const y+4)))
- (reg z+1200))
- */
-
- real2 = copy_rtx (real);
- PUT_MODE (SET_DEST (real2), SImode);
- reg = SET_SRC (real2);
- real2 = replace_rtx (real2, reg, gen_rtx_REG (SImode, REGNO (reg)));
- synth = copy_rtx (real2);
-
- if (BYTES_BIG_ENDIAN)
- {
- offset = XEXP (XEXP (SET_DEST (real2), 0), 1);
- real2 = replace_rtx (real2, offset, GEN_INT (INTVAL (offset) + 4));
- }
-
- reg = SET_SRC (synth);
-
- synth = replace_rtx (synth, reg,
- gen_rtx_REG (SImode, REGNO (reg) + 1200));
-
- offset = XEXP (XEXP (SET_DEST (synth), 0), 1);
- synth = replace_rtx (synth, offset,
- GEN_INT (INTVAL (offset)
- + (BYTES_BIG_ENDIAN ? 0 : 4)));
-
- RTX_FRAME_RELATED_P (synth) = 1;
- RTX_FRAME_RELATED_P (real2) = 1;
- if (BYTES_BIG_ENDIAN)
- real = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, synth, real2));
- else
- real = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, real2, synth));
-
- return real;
-}
-
/* Returns an insn that has a vrsave set operation with the
appropriate CLOBBERs. */