diff options
author | bstarynk <bstarynk@138bc75d-0d04-0410-961f-82ee72b054a4> | 2008-03-11 15:19:27 +0000 |
---|---|---|
committer | bstarynk <bstarynk@138bc75d-0d04-0410-961f-82ee72b054a4> | 2008-03-11 15:19:27 +0000 |
commit | 84e39114ef36f4f202028695936e3725b9a08474 (patch) | |
tree | 1b76344540036a57ec23806ad0e4e22b62c916c3 /gcc/config | |
parent | f778423557464b07c76cbf36e2032cccb94c02bc (diff) | |
download | gcc-84e39114ef36f4f202028695936e3725b9a08474.tar.gz |
2008-03-11 Basile Starynkevitch <basile@starynkevitch.net>
merged with trunk r133107
git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/branches/melt-branch@133113 138bc75d-0d04-0410-961f-82ee72b054a4
Diffstat (limited to 'gcc/config')
28 files changed, 1165 insertions, 312 deletions
diff --git a/gcc/config/arm/arm-cores.def b/gcc/config/arm/arm-cores.def index 748dcb074d1..cce3195a453 100644 --- a/gcc/config/arm/arm-cores.def +++ b/gcc/config/arm/arm-cores.def @@ -1,5 +1,5 @@ /* ARM CPU Cores - Copyright (C) 2003, 2005, 2006, 2007 Free Software Foundation, Inc. + Copyright (C) 2003, 2005, 2006, 2007, 2008 Free Software Foundation, Inc. Written by CodeSourcery, LLC This file is part of GCC. @@ -118,3 +118,4 @@ ARM_CORE("arm1156t2-s", arm1156t2s, 6T2, FL_LDSCHED, 9e) ARM_CORE("cortex-a8", cortexa8, 7A, FL_LDSCHED, 9e) ARM_CORE("cortex-r4", cortexr4, 7R, FL_LDSCHED, 9e) ARM_CORE("cortex-m3", cortexm3, 7M, FL_LDSCHED, 9e) +ARM_CORE("cortex-m1", cortexm1, 6M, FL_LDSCHED, 9e) diff --git a/gcc/config/arm/arm-tune.md b/gcc/config/arm/arm-tune.md index 5b4c46f07b7..d73382bc920 100644 --- a/gcc/config/arm/arm-tune.md +++ b/gcc/config/arm/arm-tune.md @@ -1,5 +1,5 @@ ;; -*- buffer-read-only: t -*- ;; Generated automatically by gentune.sh from arm-cores.def (define_attr "tune" - "arm2,arm250,arm3,arm6,arm60,arm600,arm610,arm620,arm7,arm7d,arm7di,arm70,arm700,arm700i,arm710,arm720,arm710c,arm7100,arm7500,arm7500fe,arm7m,arm7dm,arm7dmi,arm8,arm810,strongarm,strongarm110,strongarm1100,strongarm1110,arm7tdmi,arm7tdmis,arm710t,arm720t,arm740t,arm9,arm9tdmi,arm920,arm920t,arm922t,arm940t,ep9312,arm10tdmi,arm1020t,arm9e,arm946es,arm966es,arm968es,arm10e,arm1020e,arm1022e,xscale,iwmmxt,arm926ejs,arm1026ejs,arm1136js,arm1136jfs,arm1176jzs,arm1176jzfs,mpcorenovfp,mpcore,arm1156t2s,cortexa8,cortexr4,cortexm3" + "arm2,arm250,arm3,arm6,arm60,arm600,arm610,arm620,arm7,arm7d,arm7di,arm70,arm700,arm700i,arm710,arm720,arm710c,arm7100,arm7500,arm7500fe,arm7m,arm7dm,arm7dmi,arm8,arm810,strongarm,strongarm110,strongarm1100,strongarm1110,arm7tdmi,arm7tdmis,arm710t,arm720t,arm740t,arm9,arm9tdmi,arm920,arm920t,arm922t,arm940t,ep9312,arm10tdmi,arm1020t,arm9e,arm946es,arm966es,arm968es,arm10e,arm1020e,arm1022e,xscale,iwmmxt,arm926ejs,arm1026ejs,arm1136js,arm1136jfs,arm1176jzs,arm1176jzfs,mpcorenovfp,mpcore,arm1156t2s,cortexa8,cortexr4,cortexm3,cortexm1" (const (symbol_ref "arm_tune"))) diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c index c66b51ec328..bddb0e2771a 100644 --- a/gcc/config/arm/arm.c +++ b/gcc/config/arm/arm.c @@ -1,6 +1,6 @@ /* Output routines for GCC for ARM. Copyright (C) 1991, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, - 2002, 2003, 2004, 2005, 2006, 2007 Free Software Foundation, Inc. + 2002, 2003, 2004, 2005, 2006, 2007, 2008 Free Software Foundation, Inc. Contributed by Pieter `Tiggr' Schoenmakers (rcpieter@win.tue.nl) and Martin Simmons (@harleqn.co.uk). More major hacks by Richard Earnshaw (rearnsha@arm.com). @@ -188,6 +188,7 @@ static void arm_target_help (void); static unsigned HOST_WIDE_INT arm_shift_truncation_mask (enum machine_mode); static bool arm_cannot_copy_insn_p (rtx); static bool arm_tls_symbol_p (rtx x); +static int arm_issue_rate (void); static void arm_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED; @@ -358,6 +359,9 @@ static void arm_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED; #undef TARGET_CANNOT_FORCE_CONST_MEM #define TARGET_CANNOT_FORCE_CONST_MEM arm_cannot_force_const_mem +#undef TARGET_SCHED_ISSUE_RATE +#define TARGET_SCHED_ISSUE_RATE arm_issue_rate + #undef TARGET_MANGLE_TYPE #define TARGET_MANGLE_TYPE arm_mangle_type @@ -460,6 +464,7 @@ static int thumb_call_reg_needed; #define FL_FOR_ARCH6Z FL_FOR_ARCH6 #define FL_FOR_ARCH6ZK FL_FOR_ARCH6K #define FL_FOR_ARCH6T2 (FL_FOR_ARCH6 | FL_THUMB2) +#define FL_FOR_ARCH6M (FL_FOR_ARCH6 & ~FL_NOTM) #define FL_FOR_ARCH7 (FL_FOR_ARCH6T2 &~ FL_NOTM) #define FL_FOR_ARCH7A (FL_FOR_ARCH7 | FL_NOTM) #define FL_FOR_ARCH7R (FL_FOR_ARCH7A | FL_DIV) @@ -632,6 +637,7 @@ static const struct processors all_architectures[] = {"armv6z", arm1176jzs, "6Z", FL_CO_PROC | FL_FOR_ARCH6Z, NULL}, {"armv6zk", arm1176jzs, "6ZK", FL_CO_PROC | FL_FOR_ARCH6ZK, NULL}, {"armv6t2", arm1156t2s, "6T2", FL_CO_PROC | FL_FOR_ARCH6T2, NULL}, + {"armv6-m", cortexm1, "6M", FL_FOR_ARCH6M, NULL}, {"armv7", cortexa8, "7", FL_CO_PROC | FL_FOR_ARCH7, NULL}, {"armv7-a", cortexa8, "7A", FL_CO_PROC | FL_FOR_ARCH7A, NULL}, {"armv7-r", cortexr4, "7R", FL_CO_PROC | FL_FOR_ARCH7R, NULL}, @@ -17639,12 +17645,23 @@ arm_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED, ? 1 : 0); if (mi_delta < 0) mi_delta = - mi_delta; - /* When generating 16-bit thumb code, thunks are entered in arm mode. */ + if (TARGET_THUMB1) { int labelno = thunk_label++; ASM_GENERATE_INTERNAL_LABEL (label, "LTHUMBFUNC", labelno); - fputs ("\tldr\tr12, ", file); + /* Thunks are entered in arm mode when avaiable. */ + if (TARGET_THUMB1_ONLY) + { + /* push r3 so we can use it as a temporary. */ + /* TODO: Omit this save if r3 is not used. */ + fputs ("\tpush {r3}\n", file); + fputs ("\tldr\tr3, ", file); + } + else + { + fputs ("\tldr\tr12, ", file); + } assemble_name (file, label); fputc ('\n', file); if (flag_pic) @@ -17658,29 +17675,63 @@ arm_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED, Note that we have "+ 1" because some versions of GNU ld don't set the low bit of the result for R_ARM_REL32 - relocations against thumb function symbols. */ + relocations against thumb function symbols. + On ARMv6M this is +4, not +8. */ ASM_GENERATE_INTERNAL_LABEL (labelpc, "LTHUNKPC", labelno); assemble_name (file, labelpc); fputs (":\n", file); - fputs ("\tadd\tr12, pc, r12\n", file); + if (TARGET_THUMB1_ONLY) + { + /* This is 2 insns after the start of the thunk, so we know it + is 4-byte aligned. */ + fputs ("\tadd\tr3, pc, r3\n", file); + fputs ("\tmov r12, r3\n", file); + } + else + fputs ("\tadd\tr12, pc, r12\n", file); } + else if (TARGET_THUMB1_ONLY) + fputs ("\tmov r12, r3\n", file); } - /* TODO: Use movw/movt for large constants when available. */ - while (mi_delta != 0) + if (TARGET_THUMB1_ONLY) { - if ((mi_delta & (3 << shift)) == 0) - shift += 2; - else - { - asm_fprintf (file, "\t%s\t%r, %r, #%d\n", - mi_op, this_regno, this_regno, - mi_delta & (0xff << shift)); - mi_delta &= ~(0xff << shift); - shift += 8; - } + if (mi_delta > 255) + { + fputs ("\tldr\tr3, ", file); + assemble_name (file, label); + fputs ("+4\n", file); + asm_fprintf (file, "\t%s\t%r, %r, r3\n", + mi_op, this_regno, this_regno); + } + else if (mi_delta != 0) + { + asm_fprintf (file, "\t%s\t%r, %r, #%d\n", + mi_op, this_regno, this_regno, + mi_delta); + } + } + else + { + /* TODO: Use movw/movt for large constants when available. */ + while (mi_delta != 0) + { + if ((mi_delta & (3 << shift)) == 0) + shift += 2; + else + { + asm_fprintf (file, "\t%s\t%r, %r, #%d\n", + mi_op, this_regno, this_regno, + mi_delta & (0xff << shift)); + mi_delta &= ~(0xff << shift); + shift += 8; + } + } } if (TARGET_THUMB1) { + if (TARGET_THUMB1_ONLY) + fputs ("\tpop\t{r3}\n", file); + fprintf (file, "\tbx\tr12\n"); ASM_OUTPUT_ALIGN (file, 2); assemble_name (file, label); @@ -17699,6 +17750,9 @@ arm_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED, else /* Output ".word .LTHUNKn". */ assemble_integer (XEXP (DECL_RTL (function), 0), 4, BITS_PER_WORD, 1); + + if (TARGET_THUMB1_ONLY && mi_delta > 255) + assemble_integer (GEN_INT(mi_delta), 4, BITS_PER_WORD, 1); } else { @@ -18660,6 +18714,22 @@ thumb2_output_casesi (rtx *operands) } } +/* Most ARM cores are single issue, but some newer ones can dual issue. + The scheduler descriptions rely on this being correct. */ +static int +arm_issue_rate (void) +{ + switch (arm_tune) + { + case cortexr4: + case cortexa8: + return 2; + + default: + return 1; + } +} + /* A table and a function to perform ARM-specific name mangling for NEON vector types in order to conform to the AAPCS (see "Procedure Call Standard for the ARM Architecture", Appendix A). To qualify diff --git a/gcc/config/arm/arm.h b/gcc/config/arm/arm.h index fcb90ab3ca6..d93476ba77c 100644 --- a/gcc/config/arm/arm.h +++ b/gcc/config/arm/arm.h @@ -1,6 +1,7 @@ /* Definitions of target machine for GNU compiler, for ARM. Copyright (C) 1991, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, - 2001, 2002, 2003, 2004, 2005, 2006, 2007 Free Software Foundation, Inc. + 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008 + Free Software Foundation, Inc. Contributed by Pieter `Tiggr' Schoenmakers (rcpieter@win.tue.nl) and Martin Simmons (@harleqn.co.uk). More major hacks by Richard Earnshaw (rearnsha@arm.com) @@ -207,6 +208,8 @@ extern void (*arm_lang_output_object_attributes_hook)(void); #define TARGET_32BIT (TARGET_ARM || arm_arch_thumb2) /* 32-bit Thumb-2 code. */ #define TARGET_THUMB2 (TARGET_THUMB && arm_arch_thumb2) +/* Thumb-1 only. */ +#define TARGET_THUMB1_ONLY (TARGET_THUMB1 && !arm_arch_notm) /* The following two macros concern the ability to execute coprocessor instructions for VFPv3 or NEON. TARGET_VFP3 is currently only ever @@ -2397,7 +2400,8 @@ extern int making_const_table; if (TARGET_THUMB) \ { \ if (is_called_in_ARM_mode (DECL) \ - || (TARGET_THUMB1 && current_function_is_thunk)) \ + || (TARGET_THUMB1 && !TARGET_THUMB1_ONLY \ + && current_function_is_thunk)) \ fprintf (STREAM, "\t.code 32\n") ; \ else if (TARGET_THUMB1) \ fprintf (STREAM, "\t.code\t16\n\t.thumb_func\n") ; \ diff --git a/gcc/config/arm/arm.md b/gcc/config/arm/arm.md index 765b89bb84c..5b40449cee1 100644 --- a/gcc/config/arm/arm.md +++ b/gcc/config/arm/arm.md @@ -1,6 +1,7 @@ ;;- Machine description for ARM for GNU compiler ;; Copyright 1991, 1993, 1994, 1995, 1996, 1996, 1997, 1998, 1999, 2000, -;; 2001, 2002, 2003, 2004, 2005, 2006, 2007 Free Software Foundation, Inc. +;; 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008 +;; Free Software Foundation, Inc. ;; Contributed by Pieter `Tiggr' Schoenmakers (rcpieter@win.tue.nl) ;; and Martin Simmons (@harleqn.co.uk). ;; More major hacks by Richard Earnshaw (rearnsha@arm.com). @@ -93,9 +94,9 @@ (UNSPEC_TLS 20) ; A symbol that has been treated properly for TLS usage. (UNSPEC_PIC_LABEL 21) ; A label used for PIC access that does not appear in the ; instruction stream. - (UNSPEC_STACK_ALIGN 20) ; Doubleword aligned stack pointer. Used to + (UNSPEC_STACK_ALIGN 22) ; Doubleword aligned stack pointer. Used to ; generate correct unwind information. - (UNSPEC_PIC_OFFSET 22) ; A symbolic 12-bit OFFSET that has been treated + (UNSPEC_PIC_OFFSET 23) ; A symbolic 12-bit OFFSET that has been treated ; correctly for PIC usage. ] ) @@ -183,7 +184,7 @@ ;; scheduling information. (define_attr "insn" - "mov,mvn,smulxy,smlaxy,smlalxy,smulwy,smlawx,mul,muls,mla,mlas,umull,umulls,umlal,umlals,smull,smulls,smlal,smlals,smlawy,smuad,smuadx,smlad,smladx,smusd,smusdx,smlsd,smlsdx,smmul,smmulr,smmla,umaal,smlald,smlsld,clz,mrs,msr,xtab,other" + "mov,mvn,smulxy,smlaxy,smlalxy,smulwy,smlawx,mul,muls,mla,mlas,umull,umulls,umlal,umlals,smull,smulls,smlal,smlals,smlawy,smuad,smuadx,smlad,smladx,smusd,smusdx,smlsd,smlsdx,smmul,smmulr,smmla,umaal,smlald,smlsld,clz,mrs,msr,xtab,sdiv,udiv,other" (const_string "other")) ; TYPE attribute is used to detect floating point instructions which, if @@ -332,7 +333,7 @@ (define_attr "generic_sched" "yes,no" (const (if_then_else - (eq_attr "tune" "arm926ejs,arm1020e,arm1026ejs,arm1136js,arm1136jfs,cortexa8") + (eq_attr "tune" "arm926ejs,arm1020e,arm1026ejs,arm1136js,arm1136jfs,cortexa8,cortexr4") (const_string "no") (const_string "yes")))) @@ -349,6 +350,7 @@ (include "arm1026ejs.md") (include "arm1136jfs.md") (include "cortex-a8.md") +(include "cortex-r4.md") ;;--------------------------------------------------------------------------- diff --git a/gcc/config/arm/bpabi-v6m.S b/gcc/config/arm/bpabi-v6m.S new file mode 100644 index 00000000000..fa3b9c41478 --- /dev/null +++ b/gcc/config/arm/bpabi-v6m.S @@ -0,0 +1,280 @@ +/* Miscellaneous BPABI functions. ARMv6M implementation + + Copyright (C) 2006, 2008 Free Software Foundation, Inc. + Contributed by CodeSourcery. + + This file is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by the + Free Software Foundation; either version 2, or (at your option) any + later version. + + In addition to the permissions in the GNU General Public License, the + Free Software Foundation gives you unlimited permission to link the + compiled version of this file into combinations with other programs, + and to distribute those combinations without any restriction coming + from the use of this file. (The General Public License restrictions + do apply in other respects; for example, they cover modification of + the file, and distribution when not linked into a combine + executable.) + + This file is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; see the file COPYING. If not, write to + the Free Software Foundation, 51 Franklin Street, Fifth Floor, + Boston, MA 02110-1301, USA. */ + +#ifdef __ARMEB__ +#define xxh r0 +#define xxl r1 +#define yyh r2 +#define yyl r3 +#else +#define xxh r1 +#define xxl r0 +#define yyh r3 +#define yyl r2 +#endif + +#ifdef L_aeabi_lcmp + +FUNC_START aeabi_lcmp + cmp xxh, yyh + beq 1f + bgt 2f + mov r0, #1 + neg r0, r0 + RET +2: + mov r0, #1 + RET +1: + sub r0, xxl, yyl + beq 1f + bhi 2f + mov r0, #1 + neg r0, r0 + RET +2: + mov r0, #1 +1: + RET + FUNC_END aeabi_lcmp + +#endif /* L_aeabi_lcmp */ + +#ifdef L_aeabi_ulcmp + +FUNC_START aeabi_ulcmp + cmp xxh, yyh + bne 1f + sub r0, xxl, yyl + beq 2f +1: + bcs 1f + mov r0, #1 + neg r0, r0 + RET +1: + mov r0, #1 +2: + RET + FUNC_END aeabi_ulcmp + +#endif /* L_aeabi_ulcmp */ + +#ifdef L_aeabi_ldivmod + +FUNC_START aeabi_ldivmod + push {r0, r1} + mov r0, sp + push {r0, lr} + ldr r0, [sp, #8] + bl SYM(__gnu_ldivmod_helper) + ldr r3, [sp, #4] + mov lr, r3 + add sp, sp, #8 + pop {r2, r3} + RET + FUNC_END aeabi_ldivmod + +#endif /* L_aeabi_ldivmod */ + +#ifdef L_aeabi_uldivmod + +FUNC_START aeabi_uldivmod + push {r0, r1} + mov r0, sp + push {r0, lr} + ldr r0, [sp, #8] + bl SYM(__gnu_uldivmod_helper) + ldr r3, [sp, #4] + mov lr, r3 + add sp, sp, #8 + pop {r2, r3} + RET + FUNC_END aeabi_uldivmod + +#endif /* L_aeabi_uldivmod */ + +#ifdef L_arm_addsubsf3 + +FUNC_START aeabi_frsub + + push {r4, lr} + mov r4, #1 + lsl r4, #31 + eor r0, r0, r4 + bl __aeabi_fadd + pop {r4, pc} + + FUNC_END aeabi_frsub + +#endif /* L_arm_addsubsf3 */ + +#ifdef L_arm_cmpsf2 + +FUNC_START aeabi_cfrcmple + + mov ip, r0 + mov r0, r1 + mov r1, ip + b 6f + +FUNC_START aeabi_cfcmpeq +FUNC_ALIAS aeabi_cfcmple aeabi_cfcmpeq + + @ The status-returning routines are required to preserve all + @ registers except ip, lr, and cpsr. +6: push {r0, r1, r2, r3, r4, lr} + bl __lesf2 + @ Set the Z flag correctly, and the C flag unconditionally. + cmp r0, #0 + @ Clear the C flag if the return value was -1, indicating + @ that the first operand was smaller than the second. + bmi 1f + mov r1, #0 + cmn r0, r1 +1: + pop {r0, r1, r2, r3, r4, pc} + + FUNC_END aeabi_cfcmple + FUNC_END aeabi_cfcmpeq + FUNC_END aeabi_cfrcmple + +FUNC_START aeabi_fcmpeq + + push {r4, lr} + bl __eqsf2 + neg r0, r0 + add r0, r0, #1 + pop {r4, pc} + + FUNC_END aeabi_fcmpeq + +.macro COMPARISON cond, helper, mode=sf2 +FUNC_START aeabi_fcmp\cond + + push {r4, lr} + bl __\helper\mode + cmp r0, #0 + b\cond 1f + mov r0, #0 + pop {r4, pc} +1: + mov r0, #1 + pop {r4, pc} + + FUNC_END aeabi_fcmp\cond +.endm + +COMPARISON lt, le +COMPARISON le, le +COMPARISON gt, ge +COMPARISON ge, ge + +#endif /* L_arm_cmpsf2 */ + +#ifdef L_arm_addsubdf3 + +FUNC_START aeabi_drsub + + push {r4, lr} + mov r4, #1 + lsl r4, #31 + eor xxh, xxh, r4 + bl __aeabi_dadd + pop {r4, pc} + + FUNC_END aeabi_drsub + +#endif /* L_arm_addsubdf3 */ + +#ifdef L_arm_cmpdf2 + +FUNC_START aeabi_cdrcmple + + mov ip, r0 + mov r0, r2 + mov r2, ip + mov ip, r1 + mov r1, r3 + mov r3, ip + b 6f + +FUNC_START aeabi_cdcmpeq +FUNC_ALIAS aeabi_cdcmple aeabi_cdcmpeq + + @ The status-returning routines are required to preserve all + @ registers except ip, lr, and cpsr. +6: push {r0, r1, r2, r3, r4, lr} + bl __ledf2 + @ Set the Z flag correctly, and the C flag unconditionally. + cmp r0, #0 + @ Clear the C flag if the return value was -1, indicating + @ that the first operand was smaller than the second. + bmi 1f + mov r1, #0 + cmn r0, r1 +1: + pop {r0, r1, r2, r3, r4, pc} + + FUNC_END aeabi_cdcmple + FUNC_END aeabi_cdcmpeq + FUNC_END aeabi_cdrcmple + +FUNC_START aeabi_dcmpeq + + push {r4, lr} + bl __eqdf2 + neg r0, r0 + add r0, r0, #1 + pop {r4, pc} + + FUNC_END aeabi_dcmpeq + +.macro COMPARISON cond, helper, mode=df2 +FUNC_START aeabi_dcmp\cond + + push {r4, lr} + bl __\helper\mode + cmp r0, #0 + b\cond 1f + mov r0, #0 + pop {r4, pc} +1: + mov r0, #1 + pop {r4, pc} + + FUNC_END aeabi_dcmp\cond +.endm + +COMPARISON lt, le +COMPARISON le, le +COMPARISON gt, ge +COMPARISON ge, ge + +#endif /* L_arm_cmpdf2 */ diff --git a/gcc/config/arm/bpabi.h b/gcc/config/arm/bpabi.h index a67f6498cdd..0f3b24faaf3 100644 --- a/gcc/config/arm/bpabi.h +++ b/gcc/config/arm/bpabi.h @@ -1,5 +1,5 @@ /* Configuration file for ARM BPABI targets. - Copyright (C) 2004, 2005, 2007 + Copyright (C) 2004, 2005, 2007, 2008 Free Software Foundation, Inc. Contributed by CodeSourcery, LLC @@ -99,6 +99,21 @@ #define DECLARE_LIBRARY_RENAMES RENAME_LIBRARY (floatdisf, l2f) #endif +/* These renames are needed on ARMv6M. Other targets get them from + assembly routines. */ +#ifdef L_fixunsdfsi +#define DECLARE_LIBRARY_RENAMES RENAME_LIBRARY (fixunsdfsi, d2uiz) +#endif +#ifdef L_fixunssfsi +#define DECLARE_LIBRARY_RENAMES RENAME_LIBRARY (fixunssfsi, f2uiz) +#endif +#ifdef L_floatundidf +#define DECLARE_LIBRARY_RENAMES RENAME_LIBRARY (floatundidf, ul2d) +#endif +#ifdef L_floatundisf +#define DECLARE_LIBRARY_RENAMES RENAME_LIBRARY (floatundisf, ul2f) +#endif + /* The BPABI requires that we always use an out-of-line implementation of RTTI comparison, even if the target supports weak symbols, because the same object file might be used on a target that does diff --git a/gcc/config/arm/cortex-r4.md b/gcc/config/arm/cortex-r4.md new file mode 100644 index 00000000000..d912f205994 --- /dev/null +++ b/gcc/config/arm/cortex-r4.md @@ -0,0 +1,288 @@ +;; ARM Cortex-R4 scheduling description. +;; Copyright (C) 2007, 2008 Free Software Foundation, Inc. +;; Contributed by CodeSourcery. + +;; This file is part of GCC. + +;; GCC is distributed in the hope that it will be useful, but WITHOUT +;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +;; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public +;; License for more details. + +;; You should have received a copy of the GNU General Public License +;; along with GCC; see the file COPYING. If not, write to +;; the Free Software Foundation, 51 Franklin Street, Fifth Floor, +;; Boston, MA 02110-1301, USA. + +(define_automaton "cortex_r4") + +;; We approximate the dual-issue constraints of this core using four +;; "issue units" and a reservation matrix as follows. The numbers indicate +;; the instruction groups' preferences in order. Multiple entries for +;; the same numbered preference indicate units that must be reserved +;; together. +;; +;; Issue unit: A B C ALU +;; +;; ALU w/o reg shift 1st 2nd 1st and 2nd +;; ALU w/ reg shift 1st 2nd 2nd 1st and 2nd +;; Moves 1st 2nd 2nd +;; Multiplication 1st 1st +;; Division 1st 1st +;; Load/store single 1st 1st +;; Other load/store 1st 1st +;; Branches 1st + +(define_cpu_unit "cortex_r4_issue_a" "cortex_r4") +(define_cpu_unit "cortex_r4_issue_b" "cortex_r4") +(define_cpu_unit "cortex_r4_issue_c" "cortex_r4") +(define_cpu_unit "cortex_r4_issue_alu" "cortex_r4") + +(define_reservation "cortex_r4_alu" + "(cortex_r4_issue_a+cortex_r4_issue_alu)|\ + (cortex_r4_issue_b+cortex_r4_issue_alu)") +(define_reservation "cortex_r4_alu_shift_reg" + "(cortex_r4_issue_a+cortex_r4_issue_alu)|\ + (cortex_r4_issue_b+cortex_r4_issue_c+\ + cortex_r4_issue_alu)") +(define_reservation "cortex_r4_mov" + "cortex_r4_issue_a|(cortex_r4_issue_b+\ + cortex_r4_issue_alu)") +(define_reservation "cortex_r4_mul" "cortex_r4_issue_a+cortex_r4_issue_alu") +(define_reservation "cortex_r4_mul_2" + "(cortex_r4_issue_a+cortex_r4_issue_alu)*2") +;; Division instructions execute out-of-order with respect to the +;; rest of the pipeline and only require reservations on their first and +;; final cycles. +(define_reservation "cortex_r4_div_9" + "cortex_r4_issue_a+cortex_r4_issue_alu,\ + nothing*7,\ + cortex_r4_issue_a+cortex_r4_issue_alu") +(define_reservation "cortex_r4_div_10" + "cortex_r4_issue_a+cortex_r4_issue_alu,\ + nothing*8,\ + cortex_r4_issue_a+cortex_r4_issue_alu") +(define_reservation "cortex_r4_load_store" + "cortex_r4_issue_a+cortex_r4_issue_c") +(define_reservation "cortex_r4_load_store_2" + "(cortex_r4_issue_a+cortex_r4_issue_b)*2") +(define_reservation "cortex_r4_branch" "cortex_r4_issue_b") + +;; We assume that all instructions are unconditional. + +;; Data processing instructions. Moves without shifts are kept separate +;; for the purposes of the dual-issue constraints above. +(define_insn_reservation "cortex_r4_alu" 2 + (and (eq_attr "tune" "cortexr4") + (and (eq_attr "type" "alu") + (not (eq_attr "insn" "mov")))) + "cortex_r4_alu") + +(define_insn_reservation "cortex_r4_mov" 2 + (and (eq_attr "tune" "cortexr4") + (and (eq_attr "type" "alu") + (eq_attr "insn" "mov"))) + "cortex_r4_mov") + +(define_insn_reservation "cortex_r4_alu_shift" 2 + (and (eq_attr "tune" "cortexr4") + (eq_attr "type" "alu_shift")) + "cortex_r4_alu") + +(define_insn_reservation "cortex_r4_alu_shift_reg" 2 + (and (eq_attr "tune" "cortexr4") + (eq_attr "type" "alu_shift_reg")) + "cortex_r4_alu_shift_reg") + +;; An ALU instruction followed by an ALU instruction with no early dep. +(define_bypass 1 "cortex_r4_alu,cortex_r4_alu_shift,cortex_r4_alu_shift_reg,\ + cortex_r4_mov" + "cortex_r4_alu") +(define_bypass 1 "cortex_r4_alu,cortex_r4_alu_shift,cortex_r4_alu_shift_reg,\ + cortex_r4_mov" + "cortex_r4_alu_shift" + "arm_no_early_alu_shift_dep") +(define_bypass 1 "cortex_r4_alu,cortex_r4_alu_shift,cortex_r4_alu_shift_reg,\ + cortex_r4_mov" + "cortex_r4_alu_shift_reg" + "arm_no_early_alu_shift_value_dep") + +;; In terms of availabilities, a consumer mov could theoretically be +;; issued together with a producer ALU instruction, without stalls. +;; In practice this cannot happen because mov;add (in that order) is not +;; eligible for dual issue and furthermore dual issue is not permitted +;; when a dependency is involved. We therefore note it as latency one. +;; A mov followed by another of the same is also latency one. +(define_bypass 1 "cortex_r4_alu,cortex_r4_alu_shift,cortex_r4_alu_shift_reg,\ + cortex_r4_mov" + "cortex_r4_mov") + +;; qadd, qdadd, qsub and qdsub are not currently emitted, and neither are +;; media data processing instructions nor sad instructions. + +;; Multiplication instructions. + +(define_insn_reservation "cortex_r4_mul_4" 4 + (and (eq_attr "tune" "cortexr4") + (eq_attr "insn" "mul,smmul")) + "cortex_r4_mul_2") + +(define_insn_reservation "cortex_r4_mul_3" 3 + (and (eq_attr "tune" "cortexr4") + (eq_attr "insn" "smulxy,smulwy,smuad,smusd")) + "cortex_r4_mul") + +(define_insn_reservation "cortex_r4_mla_4" 4 + (and (eq_attr "tune" "cortexr4") + (eq_attr "insn" "mla,smmla")) + "cortex_r4_mul_2") + +(define_insn_reservation "cortex_r4_mla_3" 3 + (and (eq_attr "tune" "cortexr4") + (eq_attr "insn" "smlaxy,smlawy,smlad,smlsd")) + "cortex_r4_mul") + +(define_insn_reservation "cortex_r4_smlald" 3 + (and (eq_attr "tune" "cortexr4") + (eq_attr "insn" "smlald,smlsld")) + "cortex_r4_mul") + +(define_insn_reservation "cortex_r4_mull" 4 + (and (eq_attr "tune" "cortexr4") + (eq_attr "insn" "smull,umull,umlal,umaal")) + "cortex_r4_mul_2") + +;; A multiply or an MLA with a single-register result, followed by an +;; MLA with an accumulator dependency, has its result forwarded. +(define_bypass 2 "cortex_r4_mul_3,cortex_r4_mla_3" + "cortex_r4_mla_3,cortex_r4_mla_4" + "arm_mac_accumulator_is_mul_result") + +(define_bypass 3 "cortex_r4_mul_4,cortex_r4_mla_4" + "cortex_r4_mla_3,cortex_r4_mla_4" + "arm_mac_accumulator_is_mul_result") + +;; A multiply followed by an ALU instruction needing the multiply +;; result only at ALU has lower latency than one needing it at Shift. +(define_bypass 2 "cortex_r4_mul_3,cortex_r4_mla_3,cortex_r4_smlald" + "cortex_r4_alu") +(define_bypass 2 "cortex_r4_mul_3,cortex_r4_mla_3,cortex_r4_smlald" + "cortex_r4_alu_shift" + "arm_no_early_alu_shift_dep") +(define_bypass 2 "cortex_r4_mul_3,cortex_r4_mla_3,cortex_r4_smlald" + "cortex_r4_alu_shift_reg" + "arm_no_early_alu_shift_value_dep") +(define_bypass 3 "cortex_r4_mul_4,cortex_r4_mla_4,cortex_r4_mull" + "cortex_r4_alu") +(define_bypass 3 "cortex_r4_mul_4,cortex_r4_mla_4,cortex_r4_mull" + "cortex_r4_alu_shift" + "arm_no_early_alu_shift_dep") +(define_bypass 3 "cortex_r4_mul_4,cortex_r4_mla_4,cortex_r4_mull" + "cortex_r4_alu_shift_reg" + "arm_no_early_alu_shift_value_dep") + +;; A multiply followed by a mov has one cycle lower latency again. +(define_bypass 1 "cortex_r4_mul_3,cortex_r4_mla_3,cortex_r4_smlald" + "cortex_r4_mov") +(define_bypass 2 "cortex_r4_mul_4,cortex_r4_mla_4,cortex_r4_mull" + "cortex_r4_mov") + +;; We guess that division of A/B using sdiv or udiv, on average, +;; is performed with B having ten more leading zeros than A. +;; This gives a latency of nine for udiv and ten for sdiv. +(define_insn_reservation "cortex_r4_udiv" 9 + (and (eq_attr "tune" "cortexr4") + (eq_attr "insn" "udiv")) + "cortex_r4_div_9") + +(define_insn_reservation "cortex_r4_sdiv" 10 + (and (eq_attr "tune" "cortexr4") + (eq_attr "insn" "sdiv")) + "cortex_r4_div_10") + +;; Branches. We assume correct prediction. + +(define_insn_reservation "cortex_r4_branch" 0 + (and (eq_attr "tune" "cortexr4") + (eq_attr "type" "branch")) + "cortex_r4_branch") + +;; Call latencies are not predictable. A semi-arbitrary very large +;; number is used as "positive infinity" so that everything should be +;; finished by the time of return. +(define_insn_reservation "cortex_r4_call" 32 + (and (eq_attr "tune" "cortexr4") + (eq_attr "type" "call")) + "nothing") + +;; Status register access instructions are not currently emitted. + +;; Load instructions. +;; We do not model the "addr_md_3cycle" cases and assume that +;; accesses following are correctly aligned. + +(define_insn_reservation "cortex_r4_load_1_2" 3 + (and (eq_attr "tune" "cortexr4") + (eq_attr "type" "load1,load2")) + "cortex_r4_load_store") + +(define_insn_reservation "cortex_r4_load_3_4" 4 + (and (eq_attr "tune" "cortexr4") + (eq_attr "type" "load3,load4")) + "cortex_r4_load_store_2") + +;; If a producing load is followed by an instruction consuming only +;; as a Normal Reg, there is one fewer cycle of latency. + +(define_bypass 2 "cortex_r4_load_1_2" + "cortex_r4_alu") +(define_bypass 2 "cortex_r4_load_1_2" + "cortex_r4_alu_shift" + "arm_no_early_alu_shift_dep") +(define_bypass 2 "cortex_r4_load_1_2" + "cortex_r4_alu_shift_reg" + "arm_no_early_alu_shift_value_dep") + +(define_bypass 3 "cortex_r4_load_3_4" + "cortex_r4_alu") +(define_bypass 3 "cortex_r4_load_3_4" + "cortex_r4_alu_shift" + "arm_no_early_alu_shift_dep") +(define_bypass 3 "cortex_r4_load_3_4" + "cortex_r4_alu_shift_reg" + "arm_no_early_alu_shift_value_dep") + +;; If a producing load is followed by an instruction consuming only +;; as a Late Reg, there are two fewer cycles of latency. Such consumer +;; instructions are moves and stores. + +(define_bypass 1 "cortex_r4_load_1_2" + "cortex_r4_mov,cortex_r4_store_1_2,cortex_r4_store_3_4") +(define_bypass 2 "cortex_r4_load_3_4" + "cortex_r4_mov,cortex_r4_store_1_2,cortex_r4_store_3_4") + +;; If a producer's result is required as the base or offset of a load, +;; there is an extra cycle latency. + +(define_bypass 3 "cortex_r4_alu,cortex_r4_mov,cortex_r4_alu_shift,\ + cortex_r4_alu_shift_reg" + "cortex_r4_load_1_2,cortex_r4_load_3_4") + +(define_bypass 4 "cortex_r4_mul_3,cortex_r4_mla_3,cortex_r4_smlald" + "cortex_r4_load_1_2,cortex_r4_load_3_4") + +(define_bypass 5 "cortex_r4_mul_4,cortex_r4_mla_4,cortex_r4_mull" + "cortex_r4_load_1_2,cortex_r4_load_3_4") + +;; Store instructions. + +(define_insn_reservation "cortex_r4_store_1_2" 0 + (and (eq_attr "tune" "cortexr4") + (eq_attr "type" "store1,store2")) + "cortex_r4_load_store") + +(define_insn_reservation "cortex_r4_store_3_4" 0 + (and (eq_attr "tune" "cortexr4") + (eq_attr "type" "store3,store4")) + "cortex_r4_load_store_2") + diff --git a/gcc/config/arm/elf.h b/gcc/config/arm/elf.h index 65bd00fdc3c..37c366d5ac8 100644 --- a/gcc/config/arm/elf.h +++ b/gcc/config/arm/elf.h @@ -1,7 +1,7 @@ /* Definitions of target machine for GNU compiler. For ARM with ELF obj format. - Copyright (C) 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2004, 2005, 2007 - Free Software Foundation, Inc. + Copyright (C) 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2004, 2005, 2007, + 2008 Free Software Foundation, Inc. Contributed by Philip Blundell <philb@gnu.org> and Catherine Moore <clm@cygnus.com> @@ -145,3 +145,17 @@ } \ while (0) +/* Horrible hack: We want to prevent some libgcc routines being included + for some multilibs. */ +#ifndef __ARM_ARCH_6M__ +#undef L_fixdfsi +#undef L_fixunsdfsi +#undef L_truncdfsf2 +#undef L_fixsfsi +#undef L_fixunssfsi +#undef L_floatdidf +#undef L_floatdisf +#undef L_floatundidf +#undef L_floatundisf +#endif + diff --git a/gcc/config/arm/ieee754-df.S b/gcc/config/arm/ieee754-df.S index ebf7e58dabb..a175fa6bb74 100644 --- a/gcc/config/arm/ieee754-df.S +++ b/gcc/config/arm/ieee754-df.S @@ -1,6 +1,6 @@ /* ieee754-df.S double-precision floating point support for ARM - Copyright (C) 2003, 2004, 2005, 2007 Free Software Foundation, Inc. + Copyright (C) 2003, 2004, 2005, 2007, 2008 Free Software Foundation, Inc. Contributed by Nicolas Pitre (nico@cam.org) This file is free software; you can redistribute it and/or modify it @@ -56,7 +56,7 @@ #endif -#ifdef L_negdf2 +#ifdef L_arm_negdf2 ARM_FUNC_START negdf2 ARM_FUNC_ALIAS aeabi_dneg negdf2 @@ -70,7 +70,7 @@ ARM_FUNC_ALIAS aeabi_dneg negdf2 #endif -#ifdef L_addsubdf3 +#ifdef L_arm_addsubdf3 ARM_FUNC_START aeabi_drsub @@ -603,7 +603,7 @@ LSYM(f0_ret): #endif /* L_addsubdf3 */ -#ifdef L_muldivdf3 +#ifdef L_arm_muldivdf3 ARM_FUNC_START muldf3 ARM_FUNC_ALIAS aeabi_dmul muldf3 @@ -1103,7 +1103,7 @@ LSYM(Ldv_s): #endif /* L_muldivdf3 */ -#ifdef L_cmpdf2 +#ifdef L_arm_cmpdf2 @ Note: only r0 (return value) and ip are clobbered here. @@ -1271,7 +1271,7 @@ ARM_FUNC_START aeabi_dcmpgt #endif /* L_cmpdf2 */ -#ifdef L_unorddf2 +#ifdef L_arm_unorddf2 ARM_FUNC_START unorddf2 ARM_FUNC_ALIAS aeabi_dcmpun unorddf2 @@ -1297,7 +1297,7 @@ ARM_FUNC_ALIAS aeabi_dcmpun unorddf2 #endif /* L_unorddf2 */ -#ifdef L_fixdfsi +#ifdef L_arm_fixdfsi ARM_FUNC_START fixdfsi ARM_FUNC_ALIAS aeabi_d2iz fixdfsi @@ -1339,7 +1339,7 @@ ARM_FUNC_ALIAS aeabi_d2iz fixdfsi #endif /* L_fixdfsi */ -#ifdef L_fixunsdfsi +#ifdef L_arm_fixunsdfsi ARM_FUNC_START fixunsdfsi ARM_FUNC_ALIAS aeabi_d2uiz fixunsdfsi @@ -1377,7 +1377,7 @@ ARM_FUNC_ALIAS aeabi_d2uiz fixunsdfsi #endif /* L_fixunsdfsi */ -#ifdef L_truncdfsf2 +#ifdef L_arm_truncdfsf2 ARM_FUNC_START truncdfsf2 ARM_FUNC_ALIAS aeabi_d2f truncdfsf2 diff --git a/gcc/config/arm/ieee754-sf.S b/gcc/config/arm/ieee754-sf.S index 405487a3855..2da156cd44d 100644 --- a/gcc/config/arm/ieee754-sf.S +++ b/gcc/config/arm/ieee754-sf.S @@ -1,6 +1,6 @@ /* ieee754-sf.S single-precision floating point support for ARM - Copyright (C) 2003, 2004, 2005, 2007 Free Software Foundation, Inc. + Copyright (C) 2003, 2004, 2005, 2007, 2008 Free Software Foundation, Inc. Contributed by Nicolas Pitre (nico@cam.org) This file is free software; you can redistribute it and/or modify it @@ -38,7 +38,7 @@ * if necessary without impacting performances. */ -#ifdef L_negsf2 +#ifdef L_arm_negsf2 ARM_FUNC_START negsf2 ARM_FUNC_ALIAS aeabi_fneg negsf2 @@ -51,7 +51,7 @@ ARM_FUNC_ALIAS aeabi_fneg negsf2 #endif -#ifdef L_addsubsf3 +#ifdef L_arm_addsubsf3 ARM_FUNC_START aeabi_frsub @@ -448,7 +448,7 @@ LSYM(f0_ret): #endif /* L_addsubsf3 */ -#ifdef L_muldivsf3 +#ifdef L_arm_muldivsf3 ARM_FUNC_START mulsf3 ARM_FUNC_ALIAS aeabi_fmul mulsf3 @@ -795,7 +795,7 @@ LSYM(Ldv_s): #endif /* L_muldivsf3 */ -#ifdef L_cmpsf2 +#ifdef L_arm_cmpsf2 @ The return value in r0 is @ @@ -958,7 +958,7 @@ ARM_FUNC_START aeabi_fcmpgt #endif /* L_cmpsf2 */ -#ifdef L_unordsf2 +#ifdef L_arm_unordsf2 ARM_FUNC_START unordsf2 ARM_FUNC_ALIAS aeabi_fcmpun unordsf2 @@ -983,7 +983,7 @@ ARM_FUNC_ALIAS aeabi_fcmpun unordsf2 #endif /* L_unordsf2 */ -#ifdef L_fixsfsi +#ifdef L_arm_fixsfsi ARM_FUNC_START fixsfsi ARM_FUNC_ALIAS aeabi_f2iz fixsfsi @@ -1025,7 +1025,7 @@ ARM_FUNC_ALIAS aeabi_f2iz fixsfsi #endif /* L_fixsfsi */ -#ifdef L_fixunssfsi +#ifdef L_arm_fixunssfsi ARM_FUNC_START fixunssfsi ARM_FUNC_ALIAS aeabi_f2uiz fixunssfsi diff --git a/gcc/config/arm/lib1funcs.asm b/gcc/config/arm/lib1funcs.asm index 0c6e440e598..2fc66be80d5 100644 --- a/gcc/config/arm/lib1funcs.asm +++ b/gcc/config/arm/lib1funcs.asm @@ -1,7 +1,7 @@ @ libgcc routines for ARM cpu. @ Division routines, written by Richard Earnshaw, (rearnsha@armltd.co.uk) -/* Copyright 1995, 1996, 1998, 1999, 2000, 2003, 2004, 2005, 2007 +/* Copyright 1995, 1996, 1998, 1999, 2000, 2003, 2004, 2005, 2007, 2008 Free Software Foundation, Inc. This file is free software; you can redistribute it and/or modify it @@ -94,7 +94,8 @@ Boston, MA 02110-1301, USA. */ #if defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) \ || defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6Z__) \ - || defined(__ARM_ARCH_6ZK__) || defined(__ARM_ARCH_6T2__) + || defined(__ARM_ARCH_6ZK__) || defined(__ARM_ARCH_6T2__) \ + || defined(__ARM_ARCH_6M__) # define __ARM_ARCH__ 6 #endif @@ -367,6 +368,9 @@ _L__\name: #else /* !(__INTERWORKING_STUBS__ || __thumb2__) */ +#ifdef __ARM_ARCH_6M__ +#define EQUIV .thumb_set +#else .macro ARM_FUNC_START name .text .globl SYM (__\name) @@ -379,6 +383,7 @@ SYM (__\name): .macro ARM_CALL name bl __\name .endm +#endif #endif @@ -391,6 +396,7 @@ SYM (__\name): #endif .endm +#ifndef __ARM_ARCH_6M__ .macro ARM_FUNC_ALIAS new old .globl SYM (__\new) EQUIV SYM (__\new), SYM (__\old) @@ -398,6 +404,7 @@ SYM (__\name): .set SYM (_L__\new), SYM (_L__\old) #endif .endm +#endif #ifdef __thumb__ /* Register aliases. */ @@ -1256,8 +1263,8 @@ LSYM(Lover12): #endif /* L_call_via_rX */ /* Don't bother with the old interworking routines for Thumb-2. */ -/* ??? Maybe only omit these on v7m. */ -#ifndef __thumb2__ +/* ??? Maybe only omit these on "m" variants. */ +#if !defined(__thumb2__) && !defined(__ARM_ARCH_6M__) #if defined L_interwork_call_via_rX @@ -1387,7 +1394,11 @@ LSYM(Lchange_\register): #endif /* Arch supports thumb. */ #ifndef __symbian__ +#ifndef __ARM_ARCH_6M__ #include "ieee754-df.S" #include "ieee754-sf.S" #include "bpabi.S" -#endif /* __symbian__ */ +#else /* __ARM_ARCH_6M__ */ +#include "bpabi-v6m.S" +#endif /* __ARM_ARCH_6M__ */ +#endif /* !__symbian__ */ diff --git a/gcc/config/arm/libunwind.S b/gcc/config/arm/libunwind.S index 0732e9f0af9..dd958548387 100644 --- a/gcc/config/arm/libunwind.S +++ b/gcc/config/arm/libunwind.S @@ -1,5 +1,5 @@ /* Support functions for the unwinder. - Copyright (C) 2003, 2004, 2005, 2007 Free Software Foundation, Inc. + Copyright (C) 2003, 2004, 2005, 2007, 2008 Free Software Foundation, Inc. Contributed by Paul Brook This file is free software; you can redistribute it and/or modify it @@ -53,6 +53,119 @@ #endif #endif +#ifdef __ARM_ARCH_6M__ + +/* r0 points to a 16-word block. Upload these values to the actual core + state. */ +FUNC_START restore_core_regs + mov r1, r0 + add r1, r1, #52 + ldmia r1!, {r3, r4, r5} + sub r3, r3, #4 + mov ip, r3 + str r5, [r3] + mov lr, r4 + /* Restore r8-r11. */ + mov r1, r0 + add r1, r1, #32 + ldmia r1!, {r2, r3, r4, r5} + mov r8, r2 + mov r9, r3 + mov sl, r4 + mov fp, r5 + mov r1, r0 + add r1, r1, #8 + ldmia r1!, {r2, r3, r4, r5, r6, r7} + ldr r1, [r0, #4] + ldr r0, [r0] + mov sp, ip + pop {pc} + FUNC_END restore_core_regs + UNPREFIX restore_core_regs + +/* ARMV6M does not have coprocessors, so these should never be used. */ +FUNC_START gnu_Unwind_Restore_VFP + RET + +/* Store VFR regsters d0-d15 to the address in r0. */ +FUNC_START gnu_Unwind_Save_VFP + RET + +/* Load VFP registers d0-d15 from the address in r0. + Use this to load from FSTMD format. */ +FUNC_START gnu_Unwind_Restore_VFP_D + RET + +/* Store VFP registers d0-d15 to the address in r0. + Use this to store in FLDMD format. */ +FUNC_START gnu_Unwind_Save_VFP_D + RET + +/* Load VFP registers d16-d31 from the address in r0. + Use this to load from FSTMD (=VSTM) format. Needs VFPv3. */ +FUNC_START gnu_Unwind_Restore_VFP_D_16_to_31 + RET + +/* Store VFP registers d16-d31 to the address in r0. + Use this to store in FLDMD (=VLDM) format. Needs VFPv3. */ +FUNC_START gnu_Unwind_Save_VFP_D_16_to_31 + RET + +FUNC_START gnu_Unwind_Restore_WMMXD + RET + +FUNC_START gnu_Unwind_Save_WMMXD + RET + +FUNC_START gnu_Unwind_Restore_WMMXC + RET + +FUNC_START gnu_Unwind_Save_WMMXC + RET + +.macro UNWIND_WRAPPER name nargs + FUNC_START \name + /* Create a phase2_vrs structure. */ + /* Save r0 in the PC slot so we can use it as a scratch register. */ + push {r0} + add r0, sp, #4 + push {r0, lr} /* Push original SP and LR. */ + /* Make space for r8-r12. */ + sub sp, sp, #20 + /* Save low registers. */ + push {r0, r1, r2, r3, r4, r5, r6, r7} + /* Save high registers. */ + add r0, sp, #32 + mov r1, r8 + mov r2, r9 + mov r3, sl + mov r4, fp + mov r5, ip + stmia r0!, {r1, r2, r3, r4, r5} + /* Restore original low register values. */ + add r0, sp, #4 + ldmia r0!, {r1, r2, r3, r4, r5} + /* Restore orginial r0. */ + ldr r0, [sp, #60] + str r0, [sp] + /* Demand-save flags, plus an extra word for alignment. */ + mov r3, #0 + push {r2, r3} + /* Point r1 at the block. Pass r[0..nargs) unchanged. */ + add r\nargs, sp, #4 + + bl SYM (__gnu\name) + + ldr r3, [sp, #64] + add sp, sp, #72 + bx r3 + + FUNC_END \name + UNPREFIX \name +.endm + +#else /* !__ARM_ARCH_6M__ */ + /* r0 points to a 16-word block. Upload these values to the actual core state. */ ARM_FUNC_START restore_core_regs @@ -233,6 +346,8 @@ ARM_FUNC_START gnu_Unwind_Save_WMMXC UNPREFIX \name .endm +#endif /* !__ARM_ARCH_6M__ */ + UNWIND_WRAPPER _Unwind_RaiseException 1 UNWIND_WRAPPER _Unwind_Resume 1 UNWIND_WRAPPER _Unwind_Resume_or_Rethrow 1 diff --git a/gcc/config/arm/sfp-machine.h b/gcc/config/arm/sfp-machine.h new file mode 100644 index 00000000000..bc75737d73d --- /dev/null +++ b/gcc/config/arm/sfp-machine.h @@ -0,0 +1,96 @@ +#define _FP_W_TYPE_SIZE 32 +#define _FP_W_TYPE unsigned long +#define _FP_WS_TYPE signed long +#define _FP_I_TYPE long + +#define _FP_MUL_MEAT_S(R,X,Y) \ + _FP_MUL_MEAT_1_wide(_FP_WFRACBITS_S,R,X,Y,umul_ppmm) +#define _FP_MUL_MEAT_D(R,X,Y) \ + _FP_MUL_MEAT_2_wide(_FP_WFRACBITS_D,R,X,Y,umul_ppmm) +#define _FP_MUL_MEAT_Q(R,X,Y) \ + _FP_MUL_MEAT_4_wide(_FP_WFRACBITS_Q,R,X,Y,umul_ppmm) + +#define _FP_DIV_MEAT_S(R,X,Y) _FP_DIV_MEAT_1_loop(S,R,X,Y) +#define _FP_DIV_MEAT_D(R,X,Y) _FP_DIV_MEAT_2_udiv(D,R,X,Y) +#define _FP_DIV_MEAT_Q(R,X,Y) _FP_DIV_MEAT_4_udiv(Q,R,X,Y) + +#define _FP_NANFRAC_S ((_FP_QNANBIT_S << 1) - 1) +#define _FP_NANFRAC_D ((_FP_QNANBIT_D << 1) - 1), -1 +#define _FP_NANFRAC_Q ((_FP_QNANBIT_Q << 1) - 1), -1, -1, -1 +#define _FP_NANSIGN_S 0 +#define _FP_NANSIGN_D 0 +#define _FP_NANSIGN_Q 0 + +#define _FP_KEEPNANFRACP 1 + +/* Someone please check this. */ +#define _FP_CHOOSENAN(fs, wc, R, X, Y, OP) \ + do { \ + if ((_FP_FRAC_HIGH_RAW_##fs(X) & _FP_QNANBIT_##fs) \ + && !(_FP_FRAC_HIGH_RAW_##fs(Y) & _FP_QNANBIT_##fs)) \ + { \ + R##_s = Y##_s; \ + _FP_FRAC_COPY_##wc(R,Y); \ + } \ + else \ + { \ + R##_s = X##_s; \ + _FP_FRAC_COPY_##wc(R,X); \ + } \ + R##_c = FP_CLS_NAN; \ + } while (0) + +#define __LITTLE_ENDIAN 1234 +#define __BIG_ENDIAN 4321 + +#if defined __ARMEB__ +# define __BYTE_ORDER __BIG_ENDIAN +#else +# define __BYTE_ORDER __LITTLE_ENDIAN +#endif + + +/* Define ALIASNAME as a strong alias for NAME. */ +# define strong_alias(name, aliasname) _strong_alias(name, aliasname) +# define _strong_alias(name, aliasname) \ + extern __typeof (name) aliasname __attribute__ ((alias (#name))); + +#ifdef __ARM_EABI__ +/* Rename functions to their EABI names. */ +/* The comparison functions need wrappers for EABI semantics, so + leave them unmolested. */ +#define __negsf2 __aeabi_fneg +#define __subsf3 __aeabi_fsub +#define __addsf3 __aeabi_fadd +#define __floatunsisf __aeabi_ui2f +#define __floatsisf __aeabi_i2f +#define __floatundisf __aeabi_ul2f +#define __floatdisf __aeabi_l2f +#define __mulsf3 __aeabi_fmul +#define __divsf3 __aeabi_fdiv +#define __unordsf2 __aeabi_fcmpun +#define __fixsfsi __aeabi_f2iz +#define __fixunssfsi __aeabi_f2uiz +#define __fixsfdi __aeabi_f2lz +#define __fixunssfdi __aeabi_f2ulz +#define __floatdisf __aeabi_l2f + +#define __negdf2 __aeabi_dneg +#define __subdf3 __aeabi_dsub +#define __adddf3 __aeabi_dadd +#define __floatunsidf __aeabi_ui2d +#define __floatsidf __aeabi_i2d +#define __extendsfdf2 __aeabi_f2d +#define __truncdfsf2 __aeabi_d2f +#define __floatundidf __aeabi_ul2d +#define __floatdidf __aeabi_l2d +#define __muldf3 __aeabi_dmul +#define __divdf3 __aeabi_ddiv +#define __unorddf2 __aeabi_dcmpun +#define __fixdfsi __aeabi_d2iz +#define __fixunsdfsi __aeabi_d2uiz +#define __fixdfdi __aeabi_d2lz +#define __fixunsdfdi __aeabi_d2ulz +#define __floatdidf __aeabi_l2d + +#endif /* __ARM_EABI__ */ diff --git a/gcc/config/arm/t-arm-elf b/gcc/config/arm/t-arm-elf index b423bbb3597..31ba396b433 100644 --- a/gcc/config/arm/t-arm-elf +++ b/gcc/config/arm/t-arm-elf @@ -1,10 +1,16 @@ LIB1ASMSRC = arm/lib1funcs.asm +# For most CPUs we have an assembly soft-float implementations. +# However this is not true for ARMv6M. Here we want to use the soft-fp C +# implementation. The soft-fp code is only build for ARMv6M. This pulls +# in the asm implementation for other CPUs. LIB1ASMFUNCS = _udivsi3 _divsi3 _umodsi3 _modsi3 _dvmd_tls _bb_init_func \ _call_via_rX _interwork_call_via_rX \ _lshrdi3 _ashrdi3 _ashldi3 \ - _negdf2 _addsubdf3 _muldivdf3 _cmpdf2 _unorddf2 _fixdfsi _fixunsdfsi \ - _truncdfsf2 _negsf2 _addsubsf3 _muldivsf3 _cmpsf2 _unordsf2 \ - _fixsfsi _fixunssfsi _floatdidf _floatdisf _floatundidf _floatundisf + _arm_negdf2 _arm_addsubdf3 _arm_muldivdf3 _arm_cmpdf2 _arm_unorddf2 \ + _arm_fixdfsi _arm_fixunsdfsi \ + _arm_truncdfsf2 _arm_negsf2 _arm_addsubsf3 _arm_muldivsf3 \ + _arm_cmpsf2 _arm_unordsf2 _arm_fixsfsi _arm_fixunssfsi \ + _arm_floatdidf _arm_floatdisf _arm_floatundidf _arm_floatundisf MULTILIB_OPTIONS = marm/mthumb MULTILIB_DIRNAMES = arm thumb diff --git a/gcc/config/arm/t-arm-softfp b/gcc/config/arm/t-arm-softfp new file mode 100644 index 00000000000..4a97747b195 --- /dev/null +++ b/gcc/config/arm/t-arm-softfp @@ -0,0 +1,11 @@ +softfp_float_modes := sf df +softfp_int_modes := si di +softfp_extensions := sfdf +softfp_truncations := dfsf +softfp_machine_header := arm/sfp-machine.h +softfp_exclude_libgcc2 := y +softfp_wrap_start := '\#ifdef __ARM_ARCH_6M__' +softfp_wrap_end := '\#endif' + +# softfp seems to be missing a whole bunch of prototypes. +TARGET_LIBGCC2_CFLAGS += -Wno-missing-prototypes diff --git a/gcc/config/arm/thumb2.md b/gcc/config/arm/thumb2.md index 0efe31f0f6d..ba45c88c2cb 100644 --- a/gcc/config/arm/thumb2.md +++ b/gcc/config/arm/thumb2.md @@ -1,5 +1,5 @@ ;; ARM Thumb-2 Machine Description -;; Copyright (C) 2007 Free Software Foundation, Inc. +;; Copyright (C) 2007, 2008 Free Software Foundation, Inc. ;; Written by CodeSourcery, LLC. ;; ;; This file is part of GCC. @@ -1131,7 +1131,8 @@ (match_operand:SI 2 "s_register_operand" "r")))] "TARGET_THUMB2 && arm_arch_hwdiv" "sdiv%?\t%0, %1, %2" - [(set_attr "predicable" "yes")] + [(set_attr "predicable" "yes") + (set_attr "insn" "sdiv")] ) (define_insn "udivsi3" @@ -1140,7 +1141,8 @@ (match_operand:SI 2 "s_register_operand" "r")))] "TARGET_THUMB2 && arm_arch_hwdiv" "udiv%?\t%0, %1, %2" - [(set_attr "predicable" "yes")] + [(set_attr "predicable" "yes") + (set_attr "insn" "udiv")] ) (define_insn "*thumb2_cbz" diff --git a/gcc/config/i386/gthr-win32.c b/gcc/config/i386/gthr-win32.c index c87a771ed8f..3810fc0f26c 100644 --- a/gcc/config/i386/gthr-win32.c +++ b/gcc/config/i386/gthr-win32.c @@ -151,6 +151,12 @@ __gthr_win32_mutex_init_function (__gthread_mutex_t *mutex) mutex->sema = CreateSemaphore (NULL, 0, 65535, NULL); } +void +__gthr_win32_mutex_destroy (__gthread_mutex_t *mutex) +{ + CloseHandle ((HANDLE) mutex->sema); +} + int __gthr_win32_mutex_lock (__gthread_mutex_t *mutex) { diff --git a/gcc/config/i386/i386-modes.def b/gcc/config/i386/i386-modes.def index 105d3872b79..a23e8203ff3 100644 --- a/gcc/config/i386/i386-modes.def +++ b/gcc/config/i386/i386-modes.def @@ -17,6 +17,10 @@ You should have received a copy of the GNU General Public License along with GCC; see the file COPYING3. If not see <http://www.gnu.org/licenses/>. */ +/* In 32bit, DI mode uses 32bit registers. Only 4 byte alignment + is needed. */ +ADJUST_ALIGNMENT (DI, (TARGET_64BIT || TARGET_ALIGN_DOUBLE) ? 8 : 4); + /* The x86_64 ABI specifies both XF and TF modes. XFmode is __float80 is IEEE extended; TFmode is __float128 is IEEE quad. */ @@ -75,6 +79,7 @@ VECTOR_MODES (INT, 8); /* V8QI V4HI V2SI */ VECTOR_MODES (INT, 16); /* V16QI V8HI V4SI V2DI */ VECTOR_MODES (FLOAT, 8); /* V4HF V2SF */ VECTOR_MODES (FLOAT, 16); /* V8HF V4SF V2DF */ +VECTOR_MODE (INT, DI, 1); /* V1DI */ VECTOR_MODE (INT, QI, 2); /* V2QI */ VECTOR_MODE (INT, DI, 4); /* V4DI */ VECTOR_MODE (INT, SI, 8); /* V8SI */ diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index 5a4456d912b..74b2be23d9b 100644 --- a/gcc/config/i386/i386.c +++ b/gcc/config/i386/i386.c @@ -2700,6 +2700,18 @@ override_options (void) target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS; } + /* If stack probes are required, the space used for large function + arguments on the stack must also be probed, so enable + -maccumulate-outgoing-args so this happens in the prologue. */ + if (TARGET_STACK_PROBE + && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS)) + { + if (target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS) + warning (0, "stack probing requires -maccumulate-outgoing-args " + "for correctness"); + target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS; + } + /* For sane SSE instruction set generation we need fcomi instruction. It is safe to enable all CMOVE instructions. */ if (TARGET_SSE) @@ -3826,7 +3838,7 @@ classify_argument (enum machine_mode mode, const_tree type, } /* for V1xx modes, just use the base mode */ - if (VECTOR_MODE_P (mode) + if (VECTOR_MODE_P (mode) && mode != V1DImode && GET_MODE_SIZE (GET_MODE_INNER (mode)) == bytes) mode = GET_MODE_INNER (mode); @@ -3898,6 +3910,7 @@ classify_argument (enum machine_mode mode, const_tree type, classes[0] = X86_64_SSE_CLASS; classes[1] = X86_64_SSEUP_CLASS; return 2; + case V1DImode: case V2SFmode: case V2SImode: case V4HImode: @@ -4199,6 +4212,7 @@ function_arg_advance_32 (CUMULATIVE_ARGS *cum, enum machine_mode mode, case V4HImode: case V2SImode: case V2SFmode: + case V1DImode: if (!type || !AGGREGATE_TYPE_P (type)) { cum->mmx_words += words; @@ -4362,6 +4376,7 @@ function_arg_32 (CUMULATIVE_ARGS *cum, enum machine_mode mode, case V4HImode: case V2SImode: case V2SFmode: + case V1DImode: if (!type || !AGGREGATE_TYPE_P (type)) { if (!TARGET_MMX && !warnedmmx && cum->warn_mmx) @@ -16774,7 +16789,8 @@ ia32_multipass_dfa_lookahead (void) int ix86_constant_alignment (tree exp, int align) { - if (TREE_CODE (exp) == REAL_CST) + if (TREE_CODE (exp) == REAL_CST || TREE_CODE (exp) == VECTOR_CST + || TREE_CODE (exp) == INTEGER_CST) { if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64) return 64; @@ -17943,11 +17959,11 @@ static const struct builtin_description bdesc_2arg[] = { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, UNKNOWN, 0 }, { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, UNKNOWN, 0 }, { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, UNKNOWN, 0 }, - { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_adddi3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, UNKNOWN, 0 }, + { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_addv1di3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, UNKNOWN, 0 }, { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, UNKNOWN, 0 }, { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, UNKNOWN, 0 }, { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, UNKNOWN, 0 }, - { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_subdi3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, UNKNOWN, 0 }, + { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_subv1di3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, UNKNOWN, 0 }, { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, UNKNOWN, 0 }, { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, UNKNOWN, 0 }, @@ -17998,25 +18014,6 @@ static const struct builtin_description bdesc_2arg[] = { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtsi2ss, 0, IX86_BUILTIN_CVTSI2SS, UNKNOWN, 0 }, { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtsi2ssq, 0, IX86_BUILTIN_CVTSI642SS, UNKNOWN, 0 }, - { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, 0, IX86_BUILTIN_PSLLW, UNKNOWN, 0 }, - { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, 0, IX86_BUILTIN_PSLLWI, UNKNOWN, 0 }, - { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, 0, IX86_BUILTIN_PSLLD, UNKNOWN, 0 }, - { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, 0, IX86_BUILTIN_PSLLDI, UNKNOWN, 0 }, - { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQ, UNKNOWN, 0 }, - { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQI, UNKNOWN, 0 }, - - { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, 0, IX86_BUILTIN_PSRLW, UNKNOWN, 0 }, - { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, 0, IX86_BUILTIN_PSRLWI, UNKNOWN, 0 }, - { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, 0, IX86_BUILTIN_PSRLD, UNKNOWN, 0 }, - { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, 0, IX86_BUILTIN_PSRLDI, UNKNOWN, 0 }, - { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQ, UNKNOWN, 0 }, - { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQI, UNKNOWN, 0 }, - - { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, 0, IX86_BUILTIN_PSRAW, UNKNOWN, 0 }, - { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, 0, IX86_BUILTIN_PSRAWI, UNKNOWN, 0 }, - { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, 0, IX86_BUILTIN_PSRAD, UNKNOWN, 0 }, - { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, 0, IX86_BUILTIN_PSRADI, UNKNOWN, 0 }, - { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_psadbw, 0, IX86_BUILTIN_PSADBW, UNKNOWN, 0 }, { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_pmaddwd, 0, IX86_BUILTIN_PMADDWD, UNKNOWN, 0 }, @@ -18128,17 +18125,6 @@ static const struct builtin_description bdesc_2arg[] = { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_umulsidi3, 0, IX86_BUILTIN_PMULUDQ, UNKNOWN, 0 }, { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_umulv2siv2di3, 0, IX86_BUILTIN_PMULUDQ128, UNKNOWN, 0 }, - { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv8hi3, 0, IX86_BUILTIN_PSLLWI128, UNKNOWN, 0 }, - { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv4si3, 0, IX86_BUILTIN_PSLLDI128, UNKNOWN, 0 }, - { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv2di3, 0, IX86_BUILTIN_PSLLQI128, UNKNOWN, 0 }, - - { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv8hi3, 0, IX86_BUILTIN_PSRLWI128, UNKNOWN, 0 }, - { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv4si3, 0, IX86_BUILTIN_PSRLDI128, UNKNOWN, 0 }, - { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv2di3, 0, IX86_BUILTIN_PSRLQI128, UNKNOWN, 0 }, - - { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv8hi3, 0, IX86_BUILTIN_PSRAWI128, UNKNOWN, 0 }, - { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv4si3, 0, IX86_BUILTIN_PSRADI128, UNKNOWN, 0 }, - { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmaddwd, 0, IX86_BUILTIN_PMADDWD128, UNKNOWN, 0 }, { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsi2sd, 0, IX86_BUILTIN_CVTSI2SD, UNKNOWN, 0 }, @@ -18200,6 +18186,7 @@ static const struct builtin_description bdesc_2arg[] = static const struct builtin_description bdesc_1arg[] = { + /* SSE */ { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB, UNKNOWN, 0 }, { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movmskps, 0, IX86_BUILTIN_MOVMSKPS, UNKNOWN, 0 }, @@ -18216,6 +18203,7 @@ static const struct builtin_description bdesc_1arg[] = { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttss2si, 0, IX86_BUILTIN_CVTTSS2SI, UNKNOWN, 0 }, { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvttss2siq, 0, IX86_BUILTIN_CVTTSS2SI64, UNKNOWN, 0 }, + /* SSE2 */ { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB128, UNKNOWN, 0 }, { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movmskpd, 0, IX86_BUILTIN_MOVMSKPD, UNKNOWN, 0 }, @@ -18573,6 +18561,8 @@ ix86_init_mmx_sse_builtins (void) tree V16QI_type_node = build_vector_type_for_mode (char_type_node, V16QImode); tree V2SI_type_node = build_vector_type_for_mode (intSI_type_node, V2SImode); + tree V1DI_type_node + = build_vector_type_for_mode (long_long_integer_type_node, V1DImode); tree V2SF_type_node = build_vector_type_for_mode (float_type_node, V2SFmode); tree V2DI_type_node = build_vector_type_for_mode (long_long_integer_type_node, V2DImode); @@ -18637,14 +18627,13 @@ ix86_init_mmx_sse_builtins (void) tree v4hi_ftype_v4hi_int = build_function_type_list (V4HI_type_node, V4HI_type_node, integer_type_node, NULL_TREE); - tree v4hi_ftype_v4hi_di - = build_function_type_list (V4HI_type_node, - V4HI_type_node, long_long_unsigned_type_node, - NULL_TREE); - tree v2si_ftype_v2si_di + tree v2si_ftype_v2si_int = build_function_type_list (V2SI_type_node, - V2SI_type_node, long_long_unsigned_type_node, - NULL_TREE); + V2SI_type_node, integer_type_node, NULL_TREE); + tree v1di_ftype_v1di_int + = build_function_type_list (V1DI_type_node, + V1DI_type_node, integer_type_node, NULL_TREE); + tree void_ftype_void = build_function_type (void_type_node, void_list_node); tree void_ftype_unsigned @@ -18711,10 +18700,9 @@ ix86_init_mmx_sse_builtins (void) tree v2si_ftype_v2si_v2si = build_function_type_list (V2SI_type_node, V2SI_type_node, V2SI_type_node, NULL_TREE); - tree di_ftype_di_di - = build_function_type_list (long_long_unsigned_type_node, - long_long_unsigned_type_node, - long_long_unsigned_type_node, NULL_TREE); + tree v1di_ftype_v1di_v1di + = build_function_type_list (V1DI_type_node, + V1DI_type_node, V1DI_type_node, NULL_TREE); tree di_ftype_di_di_int = build_function_type_list (long_long_unsigned_type_node, @@ -19170,8 +19158,8 @@ ix86_init_mmx_sse_builtins (void) case V2SImode: type = v2si_ftype_v2si_v2si; break; - case DImode: - type = di_ftype_di_di; + case V1DImode: + type = v1di_ftype_v1di_v1di; break; default: @@ -19263,16 +19251,25 @@ ix86_init_mmx_sse_builtins (void) /* Add the remaining MMX insns with somewhat more complicated types. */ def_builtin (OPTION_MASK_ISA_MMX, "__builtin_ia32_emms", void_ftype_void, IX86_BUILTIN_EMMS); - def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_psllw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSLLW); - def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_pslld", v2si_ftype_v2si_di, IX86_BUILTIN_PSLLD); - def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_psllq", di_ftype_di_di, IX86_BUILTIN_PSLLQ); - def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_psrlw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRLW); - def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_psrld", v2si_ftype_v2si_di, IX86_BUILTIN_PSRLD); - def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_psrlq", di_ftype_di_di, IX86_BUILTIN_PSRLQ); - - def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_psraw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRAW); - def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_psrad", v2si_ftype_v2si_di, IX86_BUILTIN_PSRAD); + def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_psllwi", v4hi_ftype_v4hi_int, IX86_BUILTIN_PSLLWI); + def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_pslldi", v2si_ftype_v2si_int, IX86_BUILTIN_PSLLDI); + def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_psllqi", v1di_ftype_v1di_int, IX86_BUILTIN_PSLLQI); + def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_psllw", v4hi_ftype_v4hi_v4hi, IX86_BUILTIN_PSLLW); + def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_pslld", v2si_ftype_v2si_v2si, IX86_BUILTIN_PSLLD); + def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_psllq", v1di_ftype_v1di_v1di, IX86_BUILTIN_PSLLQ); + + def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_psrlwi", v4hi_ftype_v4hi_int, IX86_BUILTIN_PSRLWI); + def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_psrldi", v2si_ftype_v2si_int, IX86_BUILTIN_PSRLDI); + def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_psrlqi", v1di_ftype_v1di_int, IX86_BUILTIN_PSRLQI); + def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_psrlw", v4hi_ftype_v4hi_v4hi, IX86_BUILTIN_PSRLW); + def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_psrld", v2si_ftype_v2si_v2si, IX86_BUILTIN_PSRLD); + def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_psrlq", v1di_ftype_v1di_v1di, IX86_BUILTIN_PSRLQ); + + def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_psrawi", v4hi_ftype_v4hi_int, IX86_BUILTIN_PSRAWI); + def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_psradi", v2si_ftype_v2si_int, IX86_BUILTIN_PSRADI); + def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_psraw", v4hi_ftype_v4hi_v4hi, IX86_BUILTIN_PSRAW); + def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_psrad", v2si_ftype_v2si_v2si, IX86_BUILTIN_PSRAD); def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_pshufw", v4hi_ftype_v4hi_int, IX86_BUILTIN_PSHUFW); def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_pmaddwd", v2si_ftype_v4hi_v4hi, IX86_BUILTIN_PMADDWD); @@ -20817,6 +20814,39 @@ ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED, emit_insn (pat); return target; + case IX86_BUILTIN_PSLLW: + case IX86_BUILTIN_PSLLWI: + icode = CODE_FOR_mmx_ashlv4hi3; + goto do_pshift; + case IX86_BUILTIN_PSLLD: + case IX86_BUILTIN_PSLLDI: + icode = CODE_FOR_mmx_ashlv2si3; + goto do_pshift; + case IX86_BUILTIN_PSLLQ: + case IX86_BUILTIN_PSLLQI: + icode = CODE_FOR_mmx_ashlv1di3; + goto do_pshift; + case IX86_BUILTIN_PSRAW: + case IX86_BUILTIN_PSRAWI: + icode = CODE_FOR_mmx_ashrv4hi3; + goto do_pshift; + case IX86_BUILTIN_PSRAD: + case IX86_BUILTIN_PSRADI: + icode = CODE_FOR_mmx_ashrv2si3; + goto do_pshift; + case IX86_BUILTIN_PSRLW: + case IX86_BUILTIN_PSRLWI: + icode = CODE_FOR_mmx_lshrv4hi3; + goto do_pshift; + case IX86_BUILTIN_PSRLD: + case IX86_BUILTIN_PSRLDI: + icode = CODE_FOR_mmx_lshrv2si3; + goto do_pshift; + case IX86_BUILTIN_PSRLQ: + case IX86_BUILTIN_PSRLQI: + icode = CODE_FOR_mmx_lshrv1di3; + goto do_pshift; + case IX86_BUILTIN_PSLLW128: case IX86_BUILTIN_PSLLWI128: icode = CODE_FOR_ashlv8hi3; diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h index f2429846691..98cb72ac921 100644 --- a/gcc/config/i386/i386.h +++ b/gcc/config/i386/i386.h @@ -1120,8 +1120,9 @@ do { \ ((MODE) == V2SFmode || (MODE) == SFmode) #define VALID_MMX_REG_MODE(MODE) \ - ((MODE) == DImode || (MODE) == V8QImode || (MODE) == V4HImode \ - || (MODE) == V2SImode || (MODE) == SImode) + ((MODE == V1DImode) || (MODE) == DImode \ + || (MODE) == V2SImode || (MODE) == SImode \ + || (MODE) == V4HImode || (MODE) == V8QImode) /* ??? No autovectorization into MMX or 3DNOW until we can reliably place emms and femms instructions. */ diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index 92a37280f5f..eb942d60d9c 100644 --- a/gcc/config/i386/i386.md +++ b/gcc/config/i386/i386.md @@ -95,7 +95,6 @@ (UNSPEC_RCP 45) (UNSPEC_RSQRT 46) (UNSPEC_SFENCE 47) - (UNSPEC_NOP 48) ; prevents combiner cleverness (UNSPEC_PFRCP 49) (UNSPEC_PFRCPIT1 40) (UNSPEC_PFRCPIT2 41) @@ -19873,7 +19872,7 @@ (set (reg:SI SP_REG) (minus:SI (reg:SI SP_REG) (match_dup 0))) (clobber (reg:CC FLAGS_REG))] "!TARGET_64BIT && TARGET_STACK_PROBE" - "call\t__alloca" + "call\t___chkstk" [(set_attr "type" "multi") (set_attr "length" "5")]) diff --git a/gcc/config/i386/mmintrin.h b/gcc/config/i386/mmintrin.h index a04109be616..94800ad688e 100644 --- a/gcc/config/i386/mmintrin.h +++ b/gcc/config/i386/mmintrin.h @@ -42,6 +42,7 @@ typedef int __m64 __attribute__ ((__vector_size__ (8), __may_alias__)); typedef int __v2si __attribute__ ((__vector_size__ (8))); typedef short __v4hi __attribute__ ((__vector_size__ (8))); typedef char __v8qi __attribute__ ((__vector_size__ (8))); +typedef long long __v1di __attribute__ ((__vector_size__ (8))); /* Empty the multimedia state. */ static __inline void __attribute__((__always_inline__, __artificial__)) @@ -309,7 +310,7 @@ _m_paddd (__m64 __m1, __m64 __m2) static __inline __m64 __attribute__((__always_inline__, __artificial__)) _mm_add_si64 (__m64 __m1, __m64 __m2) { - return (__m64) __builtin_ia32_paddq ((long long)__m1, (long long)__m2); + return (__m64) __builtin_ia32_paddq ((__v1di)__m1, (__v1di)__m2); } #endif @@ -413,7 +414,7 @@ _m_psubd (__m64 __m1, __m64 __m2) static __inline __m64 __attribute__((__always_inline__, __artificial__)) _mm_sub_si64 (__m64 __m1, __m64 __m2) { - return (__m64) __builtin_ia32_psubq ((long long)__m1, (long long)__m2); + return (__m64) __builtin_ia32_psubq ((__v1di)__m1, (__v1di)__m2); } #endif @@ -520,7 +521,7 @@ _m_pmullw (__m64 __m1, __m64 __m2) static __inline __m64 __attribute__((__always_inline__, __artificial__)) _mm_sll_pi16 (__m64 __m, __m64 __count) { - return (__m64) __builtin_ia32_psllw ((__v4hi)__m, (long long)__count); + return (__m64) __builtin_ia32_psllw ((__v4hi)__m, (__v4hi)__count); } static __inline __m64 __attribute__((__always_inline__, __artificial__)) @@ -532,7 +533,7 @@ _m_psllw (__m64 __m, __m64 __count) static __inline __m64 __attribute__((__always_inline__, __artificial__)) _mm_slli_pi16 (__m64 __m, int __count) { - return (__m64) __builtin_ia32_psllw ((__v4hi)__m, __count); + return (__m64) __builtin_ia32_psllwi ((__v4hi)__m, __count); } static __inline __m64 __attribute__((__always_inline__, __artificial__)) @@ -545,7 +546,7 @@ _m_psllwi (__m64 __m, int __count) static __inline __m64 __attribute__((__always_inline__, __artificial__)) _mm_sll_pi32 (__m64 __m, __m64 __count) { - return (__m64) __builtin_ia32_pslld ((__v2si)__m, (long long)__count); + return (__m64) __builtin_ia32_pslld ((__v2si)__m, (__v2si)__count); } static __inline __m64 __attribute__((__always_inline__, __artificial__)) @@ -557,7 +558,7 @@ _m_pslld (__m64 __m, __m64 __count) static __inline __m64 __attribute__((__always_inline__, __artificial__)) _mm_slli_pi32 (__m64 __m, int __count) { - return (__m64) __builtin_ia32_pslld ((__v2si)__m, __count); + return (__m64) __builtin_ia32_pslldi ((__v2si)__m, __count); } static __inline __m64 __attribute__((__always_inline__, __artificial__)) @@ -570,7 +571,7 @@ _m_pslldi (__m64 __m, int __count) static __inline __m64 __attribute__((__always_inline__, __artificial__)) _mm_sll_si64 (__m64 __m, __m64 __count) { - return (__m64) __builtin_ia32_psllq ((long long)__m, (long long)__count); + return (__m64) __builtin_ia32_psllq ((__v1di)__m, (__v1di)__count); } static __inline __m64 __attribute__((__always_inline__, __artificial__)) @@ -582,7 +583,7 @@ _m_psllq (__m64 __m, __m64 __count) static __inline __m64 __attribute__((__always_inline__, __artificial__)) _mm_slli_si64 (__m64 __m, int __count) { - return (__m64) __builtin_ia32_psllq ((long long)__m, (long long)__count); + return (__m64) __builtin_ia32_psllqi ((__v1di)__m, __count); } static __inline __m64 __attribute__((__always_inline__, __artificial__)) @@ -595,7 +596,7 @@ _m_psllqi (__m64 __m, int __count) static __inline __m64 __attribute__((__always_inline__, __artificial__)) _mm_sra_pi16 (__m64 __m, __m64 __count) { - return (__m64) __builtin_ia32_psraw ((__v4hi)__m, (long long)__count); + return (__m64) __builtin_ia32_psraw ((__v4hi)__m, (__v4hi)__count); } static __inline __m64 __attribute__((__always_inline__, __artificial__)) @@ -607,7 +608,7 @@ _m_psraw (__m64 __m, __m64 __count) static __inline __m64 __attribute__((__always_inline__, __artificial__)) _mm_srai_pi16 (__m64 __m, int __count) { - return (__m64) __builtin_ia32_psraw ((__v4hi)__m, __count); + return (__m64) __builtin_ia32_psrawi ((__v4hi)__m, __count); } static __inline __m64 __attribute__((__always_inline__, __artificial__)) @@ -620,7 +621,7 @@ _m_psrawi (__m64 __m, int __count) static __inline __m64 __attribute__((__always_inline__, __artificial__)) _mm_sra_pi32 (__m64 __m, __m64 __count) { - return (__m64) __builtin_ia32_psrad ((__v2si)__m, (long long)__count); + return (__m64) __builtin_ia32_psrad ((__v2si)__m, (__v2si)__count); } static __inline __m64 __attribute__((__always_inline__, __artificial__)) @@ -632,7 +633,7 @@ _m_psrad (__m64 __m, __m64 __count) static __inline __m64 __attribute__((__always_inline__, __artificial__)) _mm_srai_pi32 (__m64 __m, int __count) { - return (__m64) __builtin_ia32_psrad ((__v2si)__m, __count); + return (__m64) __builtin_ia32_psradi ((__v2si)__m, __count); } static __inline __m64 __attribute__((__always_inline__, __artificial__)) @@ -645,7 +646,7 @@ _m_psradi (__m64 __m, int __count) static __inline __m64 __attribute__((__always_inline__, __artificial__)) _mm_srl_pi16 (__m64 __m, __m64 __count) { - return (__m64) __builtin_ia32_psrlw ((__v4hi)__m, (long long)__count); + return (__m64) __builtin_ia32_psrlw ((__v4hi)__m, (__v4hi)__count); } static __inline __m64 __attribute__((__always_inline__, __artificial__)) @@ -657,7 +658,7 @@ _m_psrlw (__m64 __m, __m64 __count) static __inline __m64 __attribute__((__always_inline__, __artificial__)) _mm_srli_pi16 (__m64 __m, int __count) { - return (__m64) __builtin_ia32_psrlw ((__v4hi)__m, __count); + return (__m64) __builtin_ia32_psrlwi ((__v4hi)__m, __count); } static __inline __m64 __attribute__((__always_inline__, __artificial__)) @@ -670,7 +671,7 @@ _m_psrlwi (__m64 __m, int __count) static __inline __m64 __attribute__((__always_inline__, __artificial__)) _mm_srl_pi32 (__m64 __m, __m64 __count) { - return (__m64) __builtin_ia32_psrld ((__v2si)__m, (long long)__count); + return (__m64) __builtin_ia32_psrld ((__v2si)__m, (__v2si)__count); } static __inline __m64 __attribute__((__always_inline__, __artificial__)) @@ -682,7 +683,7 @@ _m_psrld (__m64 __m, __m64 __count) static __inline __m64 __attribute__((__always_inline__, __artificial__)) _mm_srli_pi32 (__m64 __m, int __count) { - return (__m64) __builtin_ia32_psrld ((__v2si)__m, __count); + return (__m64) __builtin_ia32_psrldi ((__v2si)__m, __count); } static __inline __m64 __attribute__((__always_inline__, __artificial__)) @@ -695,7 +696,7 @@ _m_psrldi (__m64 __m, int __count) static __inline __m64 __attribute__((__always_inline__, __artificial__)) _mm_srl_si64 (__m64 __m, __m64 __count) { - return (__m64) __builtin_ia32_psrlq ((long long)__m, (long long)__count); + return (__m64) __builtin_ia32_psrlq ((__v1di)__m, (__v1di)__count); } static __inline __m64 __attribute__((__always_inline__, __artificial__)) @@ -707,7 +708,7 @@ _m_psrlq (__m64 __m, __m64 __count) static __inline __m64 __attribute__((__always_inline__, __artificial__)) _mm_srli_si64 (__m64 __m, int __count) { - return (__m64) __builtin_ia32_psrlq ((long long)__m, (long long)__count); + return (__m64) __builtin_ia32_psrlqi ((__v1di)__m, __count); } static __inline __m64 __attribute__((__always_inline__, __artificial__)) diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md index 3371161f82f..1d2a41dd114 100644 --- a/gcc/config/i386/mmx.md +++ b/gcc/config/i386/mmx.md @@ -32,16 +32,18 @@ ;; 8 byte integral modes handled by MMX (and by extension, SSE) (define_mode_iterator MMXMODEI [V8QI V4HI V2SI]) +(define_mode_iterator MMXMODEI8 [V8QI V4HI V2SI V1DI]) ;; All 8-byte vector modes handled by MMX -(define_mode_iterator MMXMODE [V8QI V4HI V2SI V2SF]) +(define_mode_iterator MMXMODE [V8QI V4HI V2SI V1DI V2SF]) ;; Mix-n-match (define_mode_iterator MMXMODE12 [V8QI V4HI]) (define_mode_iterator MMXMODE24 [V4HI V2SI]) +(define_mode_iterator MMXMODE248 [V4HI V2SI V1DI]) ;; Mapping from integer vector mode to mnemonic suffix -(define_mode_attr mmxvecsize [(V8QI "b") (V4HI "w") (V2SI "d") (DI "q")]) +(define_mode_attr mmxvecsize [(V8QI "b") (V4HI "w") (V2SI "d") (V1DI "q")]) ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; @@ -53,8 +55,8 @@ ;; This is essential for maintaining stable calling conventions. (define_expand "mov<mode>" - [(set (match_operand:MMXMODEI 0 "nonimmediate_operand" "") - (match_operand:MMXMODEI 1 "nonimmediate_operand" ""))] + [(set (match_operand:MMXMODEI8 0 "nonimmediate_operand" "") + (match_operand:MMXMODEI8 1 "nonimmediate_operand" ""))] "TARGET_MMX" { ix86_expand_vector_move (<MODE>mode, operands); @@ -62,9 +64,9 @@ }) (define_insn "*mov<mode>_internal_rex64" - [(set (match_operand:MMXMODEI 0 "nonimmediate_operand" + [(set (match_operand:MMXMODEI8 0 "nonimmediate_operand" "=rm,r,!y,!y ,m ,!y,Y2,x,x ,m,r,x") - (match_operand:MMXMODEI 1 "vector_move_operand" + (match_operand:MMXMODEI8 1 "vector_move_operand" "Cr ,m,C ,!ym,!y,Y2,!y,C,xm,x,x,r"))] "TARGET_64BIT && TARGET_MMX && !(MEM_P (operands[0]) && MEM_P (operands[1]))" @@ -86,9 +88,9 @@ (set_attr "mode" "DI")]) (define_insn "*mov<mode>_internal" - [(set (match_operand:MMXMODEI 0 "nonimmediate_operand" + [(set (match_operand:MMXMODEI8 0 "nonimmediate_operand" "=!y,!y ,m ,!y ,*Y2,*Y2,*Y2 ,m ,*x,*x,*x,m ,?r ,?m") - (match_operand:MMXMODEI 1 "vector_move_operand" + (match_operand:MMXMODEI8 1 "vector_move_operand" "C ,!ym,!y,*Y2,!y ,C ,*Y2m,*Y2,C ,*x,m ,*x,irm,r"))] "TARGET_MMX && !(MEM_P (operands[0]) && MEM_P (operands[1]))" @@ -557,26 +559,16 @@ ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; (define_insn "mmx_add<mode>3" - [(set (match_operand:MMXMODEI 0 "register_operand" "=y") - (plus:MMXMODEI - (match_operand:MMXMODEI 1 "nonimmediate_operand" "%0") - (match_operand:MMXMODEI 2 "nonimmediate_operand" "ym")))] - "TARGET_MMX && ix86_binary_operator_ok (PLUS, <MODE>mode, operands)" + [(set (match_operand:MMXMODEI8 0 "register_operand" "=y") + (plus:MMXMODEI8 + (match_operand:MMXMODEI8 1 "nonimmediate_operand" "%0") + (match_operand:MMXMODEI8 2 "nonimmediate_operand" "ym")))] + "(TARGET_MMX || (TARGET_SSE2 && <MODE>mode == V1DImode)) + && ix86_binary_operator_ok (PLUS, <MODE>mode, operands)" "padd<mmxvecsize>\t{%2, %0|%0, %2}" [(set_attr "type" "mmxadd") (set_attr "mode" "DI")]) -(define_insn "mmx_adddi3" - [(set (match_operand:DI 0 "register_operand" "=y") - (unspec:DI - [(plus:DI (match_operand:DI 1 "nonimmediate_operand" "%0") - (match_operand:DI 2 "nonimmediate_operand" "ym"))] - UNSPEC_NOP))] - "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, DImode, operands)" - "paddq\t{%2, %0|%0, %2}" - [(set_attr "type" "mmxadd") - (set_attr "mode" "DI")]) - (define_insn "mmx_ssadd<mode>3" [(set (match_operand:MMXMODE12 0 "register_operand" "=y") (ss_plus:MMXMODE12 @@ -598,26 +590,15 @@ (set_attr "mode" "DI")]) (define_insn "mmx_sub<mode>3" - [(set (match_operand:MMXMODEI 0 "register_operand" "=y") - (minus:MMXMODEI - (match_operand:MMXMODEI 1 "register_operand" "0") - (match_operand:MMXMODEI 2 "nonimmediate_operand" "ym")))] - "TARGET_MMX" + [(set (match_operand:MMXMODEI8 0 "register_operand" "=y") + (minus:MMXMODEI8 + (match_operand:MMXMODEI8 1 "register_operand" "0") + (match_operand:MMXMODEI8 2 "nonimmediate_operand" "ym")))] + "(TARGET_MMX || (TARGET_SSE2 && <MODE>mode == V1DImode))" "psub<mmxvecsize>\t{%2, %0|%0, %2}" [(set_attr "type" "mmxadd") (set_attr "mode" "DI")]) -(define_insn "mmx_subdi3" - [(set (match_operand:DI 0 "register_operand" "=y") - (unspec:DI - [(minus:DI (match_operand:DI 1 "register_operand" "0") - (match_operand:DI 2 "nonimmediate_operand" "ym"))] - UNSPEC_NOP))] - "TARGET_SSE2" - "psubq\t{%2, %0|%0, %2}" - [(set_attr "type" "mmxadd") - (set_attr "mode" "DI")]) - (define_insn "mmx_sssub<mode>3" [(set (match_operand:MMXMODE12 0 "register_operand" "=y") (ss_minus:MMXMODE12 @@ -778,54 +759,32 @@ [(set (match_operand:MMXMODE24 0 "register_operand" "=y") (ashiftrt:MMXMODE24 (match_operand:MMXMODE24 1 "register_operand" "0") - (match_operand:DI 2 "nonmemory_operand" "yi")))] + (match_operand:SI 2 "nonmemory_operand" "yN")))] "TARGET_MMX" "psra<mmxvecsize>\t{%2, %0|%0, %2}" [(set_attr "type" "mmxshft") (set_attr "mode" "DI")]) (define_insn "mmx_lshr<mode>3" - [(set (match_operand:MMXMODE24 0 "register_operand" "=y") - (lshiftrt:MMXMODE24 - (match_operand:MMXMODE24 1 "register_operand" "0") - (match_operand:DI 2 "nonmemory_operand" "yi")))] + [(set (match_operand:MMXMODE248 0 "register_operand" "=y") + (lshiftrt:MMXMODE248 + (match_operand:MMXMODE248 1 "register_operand" "0") + (match_operand:SI 2 "nonmemory_operand" "yN")))] "TARGET_MMX" "psrl<mmxvecsize>\t{%2, %0|%0, %2}" [(set_attr "type" "mmxshft") (set_attr "mode" "DI")]) -(define_insn "mmx_lshrdi3" - [(set (match_operand:DI 0 "register_operand" "=y") - (unspec:DI - [(lshiftrt:DI (match_operand:DI 1 "register_operand" "0") - (match_operand:DI 2 "nonmemory_operand" "yi"))] - UNSPEC_NOP))] - "TARGET_MMX" - "psrlq\t{%2, %0|%0, %2}" - [(set_attr "type" "mmxshft") - (set_attr "mode" "DI")]) - (define_insn "mmx_ashl<mode>3" - [(set (match_operand:MMXMODE24 0 "register_operand" "=y") - (ashift:MMXMODE24 - (match_operand:MMXMODE24 1 "register_operand" "0") - (match_operand:DI 2 "nonmemory_operand" "yi")))] + [(set (match_operand:MMXMODE248 0 "register_operand" "=y") + (ashift:MMXMODE248 + (match_operand:MMXMODE248 1 "register_operand" "0") + (match_operand:SI 2 "nonmemory_operand" "yN")))] "TARGET_MMX" "psll<mmxvecsize>\t{%2, %0|%0, %2}" [(set_attr "type" "mmxshft") (set_attr "mode" "DI")]) -(define_insn "mmx_ashldi3" - [(set (match_operand:DI 0 "register_operand" "=y") - (unspec:DI - [(ashift:DI (match_operand:DI 1 "register_operand" "0") - (match_operand:DI 2 "nonmemory_operand" "yi"))] - UNSPEC_NOP))] - "TARGET_MMX" - "psllq\t{%2, %0|%0, %2}" - [(set_attr "type" "mmxshft") - (set_attr "mode" "DI")]) - ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; ;; Parallel integral comparisons diff --git a/gcc/config/i386/smmintrin.h b/gcc/config/i386/smmintrin.h index e5b0ae04c0f..08bfd107232 100644 --- a/gcc/config/i386/smmintrin.h +++ b/gcc/config/i386/smmintrin.h @@ -470,30 +470,30 @@ _mm_stream_load_si128 (__m128i *__X) #ifdef __SSE4_2__ /* These macros specify the source data format. */ -#define SIDD_UBYTE_OPS 0x00 -#define SIDD_UWORD_OPS 0x01 -#define SIDD_SBYTE_OPS 0x02 -#define SIDD_SWORD_OPS 0x03 +#define _SIDD_UBYTE_OPS 0x00 +#define _SIDD_UWORD_OPS 0x01 +#define _SIDD_SBYTE_OPS 0x02 +#define _SIDD_SWORD_OPS 0x03 /* These macros specify the comparison operation. */ -#define SIDD_CMP_EQUAL_ANY 0x00 -#define SIDD_CMP_RANGES 0x04 -#define SIDD_CMP_EQUAL_EACH 0x08 -#define SIDD_CMP_EQUAL_ORDERED 0x0c +#define _SIDD_CMP_EQUAL_ANY 0x00 +#define _SIDD_CMP_RANGES 0x04 +#define _SIDD_CMP_EQUAL_EACH 0x08 +#define _SIDD_CMP_EQUAL_ORDERED 0x0c /* These macros specify the the polarity. */ -#define SIDD_POSITIVE_POLARITY 0x00 -#define SIDD_NEGATIVE_POLARITY 0x10 -#define SIDD_MASKED_POSITIVE_POLARITY 0x20 -#define SIDD_MASKED_NEGATIVE_POLARITY 0x30 +#define _SIDD_POSITIVE_POLARITY 0x00 +#define _SIDD_NEGATIVE_POLARITY 0x10 +#define _SIDD_MASKED_POSITIVE_POLARITY 0x20 +#define _SIDD_MASKED_NEGATIVE_POLARITY 0x30 /* These macros specify the output selection in _mm_cmpXstri (). */ -#define SIDD_LEAST_SIGNIFICANT 0x00 -#define SIDD_MOST_SIGNIFICANT 0x40 +#define _SIDD_LEAST_SIGNIFICANT 0x00 +#define _SIDD_MOST_SIGNIFICANT 0x40 /* These macros specify the output selection in _mm_cmpXstrm (). */ -#define SIDD_BIT_MASK 0x00 -#define SIDD_UNIT_MASK 0x40 +#define _SIDD_BIT_MASK 0x00 +#define _SIDD_UNIT_MASK 0x40 /* Intrinsics for text/string processing. */ diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index 97250dbd2ed..8ea5bc0048b 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -5969,7 +5969,7 @@ (mult:V8HI (zero_extend:V8HI (vec_select:V4QI - (match_operand:V16QI 1 "nonimmediate_operand" "%0") + (match_operand:V16QI 1 "nonimmediate_operand" "0") (parallel [(const_int 0) (const_int 2) (const_int 4) @@ -6023,7 +6023,7 @@ (mult:V4HI (zero_extend:V4HI (vec_select:V4QI - (match_operand:V8QI 1 "nonimmediate_operand" "%0") + (match_operand:V8QI 1 "nonimmediate_operand" "0") (parallel [(const_int 0) (const_int 2) (const_int 4) diff --git a/gcc/config/rs6000/altivec.md b/gcc/config/rs6000/altivec.md index 2261a9369f3..95c588ca070 100644 --- a/gcc/config/rs6000/altivec.md +++ b/gcc/config/rs6000/altivec.md @@ -64,7 +64,6 @@ (UNSPEC_VPKUWUS 102) (UNSPEC_VPKSWUS 103) (UNSPEC_VRL 104) - (UNSPEC_VSL 107) (UNSPEC_VSLV4SI 110) (UNSPEC_VSLO 111) (UNSPEC_VSR 118) @@ -576,7 +575,7 @@ /* Generate [-0.0, -0.0, -0.0, -0.0]. */ neg0 = gen_reg_rtx (V4SImode); emit_insn (gen_altivec_vspltisw (neg0, constm1_rtx)); - emit_insn (gen_altivec_vslw (neg0, neg0, neg0)); + emit_insn (gen_ashlv4si3 (neg0, neg0, neg0)); /* Use the multiply-add. */ emit_insn (gen_altivec_vmaddfp (operands[0], operands[1], operands[2], @@ -635,7 +634,7 @@ high_product = gen_reg_rtx (V4SImode); emit_insn (gen_altivec_vmsumuhm (high_product, one, small_swap, zero)); - emit_insn (gen_altivec_vslw (high_product, high_product, sixteen)); + emit_insn (gen_ashlv4si3 (high_product, high_product, sixteen)); emit_insn (gen_addv4si3 (operands[0], high_product, low_product)); @@ -1221,15 +1220,6 @@ "vrl<VI_char> %0,%1,%2" [(set_attr "type" "vecsimple")]) -(define_insn "altivec_vsl<VI_char>" - [(set (match_operand:VI 0 "register_operand" "=v") - (unspec:VI [(match_operand:VI 1 "register_operand" "v") - (match_operand:VI 2 "register_operand" "v")] - UNSPEC_VSL))] - "TARGET_ALTIVEC" - "vsl<VI_char> %0,%1,%2" - [(set_attr "type" "vecsimple")]) - (define_insn "altivec_vsl" [(set (match_operand:V4SI 0 "register_operand" "=v") (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "v") @@ -1248,6 +1238,14 @@ "vslo %0,%1,%2" [(set_attr "type" "vecperm")]) +(define_insn "ashl<mode>3" + [(set (match_operand:VI 0 "register_operand" "=v") + (ashift:VI (match_operand:VI 1 "register_operand" "v") + (match_operand:VI 2 "register_operand" "v") ))] + "TARGET_ALTIVEC" + "vsl<VI_char> %0,%1,%2" + [(set_attr "type" "vecsimple")]) + (define_insn "lshr<mode>3" [(set (match_operand:VI 0 "register_operand" "=v") (lshiftrt:VI (match_operand:VI 1 "register_operand" "v") @@ -2039,7 +2037,7 @@ [(set (match_dup 2) (vec_duplicate:V4SI (const_int -1))) (set (match_dup 3) - (unspec:V4SI [(match_dup 2) (match_dup 2)] UNSPEC_VSL)) + (ashift:V4SI (match_dup 2) (match_dup 2))) (set (match_operand:V4SF 0 "register_operand" "=v") (and:V4SF (not:V4SF (subreg:V4SF (match_dup 3) 0)) (match_operand:V4SF 1 "register_operand" "v")))] @@ -2642,7 +2640,7 @@ /* Generate [-0.0, -0.0, -0.0, -0.0]. */ neg0 = gen_reg_rtx (V4SImode); emit_insn (gen_altivec_vspltisw (neg0, constm1_rtx)); - emit_insn (gen_altivec_vslw (neg0, neg0, neg0)); + emit_insn (gen_ashlv4si3 (neg0, neg0, neg0)); /* XOR */ emit_insn (gen_xorv4sf3 (operands[0], diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c index 30f226bf46e..593408f56d5 100644 --- a/gcc/config/rs6000/rs6000.c +++ b/gcc/config/rs6000/rs6000.c @@ -732,7 +732,6 @@ static const char *rs6000_invalid_within_doloop (const_rtx); static rtx rs6000_generate_compare (enum rtx_code); static void rs6000_emit_stack_tie (void); static void rs6000_frame_related (rtx, rtx, HOST_WIDE_INT, rtx, rtx); -static rtx spe_synthesize_frame_save (rtx); static bool spe_func_has_64bit_regs_p (void); static void emit_frame_save (rtx, rtx, enum machine_mode, unsigned int, int, HOST_WIDE_INT); @@ -3616,6 +3615,7 @@ rs6000_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED, && GET_CODE (XEXP (x, 1)) == CONST_INT && (unsigned HOST_WIDE_INT) (INTVAL (XEXP (x, 1)) + 0x8000) >= 0x10000 && !(SPE_VECTOR_MODE (mode) + || ALTIVEC_VECTOR_MODE (mode) || (TARGET_E500_DOUBLE && (mode == DFmode || mode == TFmode || mode == DImode)))) { @@ -3633,11 +3633,12 @@ rs6000_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED, && GET_MODE_NUNITS (mode) == 1 && ((TARGET_HARD_FLOAT && TARGET_FPRS) || TARGET_POWERPC64 - || (((mode != DImode && mode != DFmode && mode != DDmode) - || TARGET_E500_DOUBLE) - && mode != TFmode && mode != TDmode)) + || ((mode != DImode && mode != DFmode && mode != DDmode) + || TARGET_E500_DOUBLE)) && (TARGET_POWERPC64 || mode != DImode) - && mode != TImode) + && mode != TImode + && mode != TFmode + && mode != TDmode) { return gen_rtx_PLUS (Pmode, XEXP (x, 0), force_reg (Pmode, force_operand (XEXP (x, 1), 0))); @@ -7089,9 +7090,9 @@ static struct builtin_description bdesc_2arg[] = { MASK_ALTIVEC, CODE_FOR_altivec_vrlb, "__builtin_altivec_vrlb", ALTIVEC_BUILTIN_VRLB }, { MASK_ALTIVEC, CODE_FOR_altivec_vrlh, "__builtin_altivec_vrlh", ALTIVEC_BUILTIN_VRLH }, { MASK_ALTIVEC, CODE_FOR_altivec_vrlw, "__builtin_altivec_vrlw", ALTIVEC_BUILTIN_VRLW }, - { MASK_ALTIVEC, CODE_FOR_altivec_vslb, "__builtin_altivec_vslb", ALTIVEC_BUILTIN_VSLB }, - { MASK_ALTIVEC, CODE_FOR_altivec_vslh, "__builtin_altivec_vslh", ALTIVEC_BUILTIN_VSLH }, - { MASK_ALTIVEC, CODE_FOR_altivec_vslw, "__builtin_altivec_vslw", ALTIVEC_BUILTIN_VSLW }, + { MASK_ALTIVEC, CODE_FOR_ashlv16qi3, "__builtin_altivec_vslb", ALTIVEC_BUILTIN_VSLB }, + { MASK_ALTIVEC, CODE_FOR_ashlv8hi3, "__builtin_altivec_vslh", ALTIVEC_BUILTIN_VSLH }, + { MASK_ALTIVEC, CODE_FOR_ashlv4si3, "__builtin_altivec_vslw", ALTIVEC_BUILTIN_VSLW }, { MASK_ALTIVEC, CODE_FOR_altivec_vsl, "__builtin_altivec_vsl", ALTIVEC_BUILTIN_VSL }, { MASK_ALTIVEC, CODE_FOR_altivec_vslo, "__builtin_altivec_vslo", ALTIVEC_BUILTIN_VSLO }, { MASK_ALTIVEC, CODE_FOR_altivec_vspltb, "__builtin_altivec_vspltb", ALTIVEC_BUILTIN_VSPLTB }, @@ -13902,6 +13903,9 @@ rs6000_expand_compare_and_swapqhi (rtx dst, rtx mem, rtx oldval, rtx newval) emit_insn (gen_sync_compare_and_swapqhi_internal (wdst, mask, oldval, newval, mem)); + /* Shift the result back. */ + emit_insn (gen_lshrsi3 (wdst, wdst, shift)); + emit_move_insn (dst, gen_lowpart (mode, wdst)); } @@ -15386,77 +15390,12 @@ rs6000_frame_related (rtx insn, rtx reg, HOST_WIDE_INT val, } } - if (TARGET_SPE) - real = spe_synthesize_frame_save (real); - RTX_FRAME_RELATED_P (insn) = 1; REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, real, REG_NOTES (insn)); } -/* Given an SPE frame note, return a PARALLEL of SETs with the - original note, plus a synthetic register save. */ - -static rtx -spe_synthesize_frame_save (rtx real) -{ - rtx synth, offset, reg, real2; - - if (GET_CODE (real) != SET - || GET_MODE (SET_SRC (real)) != V2SImode) - return real; - - /* For the SPE, registers saved in 64-bits, get a PARALLEL for their - frame related note. The parallel contains a set of the register - being saved, and another set to a synthetic register (n+1200). - This is so we can differentiate between 64-bit and 32-bit saves. - Words cannot describe this nastiness. */ - - gcc_assert (GET_CODE (SET_DEST (real)) == MEM - && GET_CODE (XEXP (SET_DEST (real), 0)) == PLUS - && GET_CODE (SET_SRC (real)) == REG); - - /* Transform: - (set (mem (plus (reg x) (const y))) - (reg z)) - into: - (set (mem (plus (reg x) (const y+4))) - (reg z+1200)) - */ - - real2 = copy_rtx (real); - PUT_MODE (SET_DEST (real2), SImode); - reg = SET_SRC (real2); - real2 = replace_rtx (real2, reg, gen_rtx_REG (SImode, REGNO (reg))); - synth = copy_rtx (real2); - - if (BYTES_BIG_ENDIAN) - { - offset = XEXP (XEXP (SET_DEST (real2), 0), 1); - real2 = replace_rtx (real2, offset, GEN_INT (INTVAL (offset) + 4)); - } - - reg = SET_SRC (synth); - - synth = replace_rtx (synth, reg, - gen_rtx_REG (SImode, REGNO (reg) + 1200)); - - offset = XEXP (XEXP (SET_DEST (synth), 0), 1); - synth = replace_rtx (synth, offset, - GEN_INT (INTVAL (offset) - + (BYTES_BIG_ENDIAN ? 0 : 4))); - - RTX_FRAME_RELATED_P (synth) = 1; - RTX_FRAME_RELATED_P (real2) = 1; - if (BYTES_BIG_ENDIAN) - real = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, synth, real2)); - else - real = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, real2, synth)); - - return real; -} - /* Returns an insn that has a vrsave set operation with the appropriate CLOBBERs. */ |