summaryrefslogtreecommitdiff
path: root/gcc/config/sparc/sparc.c
diff options
context:
space:
mode:
authordavem <davem@138bc75d-0d04-0410-961f-82ee72b054a4>2011-09-27 02:10:10 +0000
committerdavem <davem@138bc75d-0d04-0410-961f-82ee72b054a4>2011-09-27 02:10:10 +0000
commitfaa087bb581c160bb7b2e42a60e218b898990eb6 (patch)
treed8e42dd1926af159fe00a417353501088bd00c7c /gcc/config/sparc/sparc.c
parent180b6c8627c425036c1f79fd380b50baedf862bc (diff)
downloadgcc-faa087bb581c160bb7b2e42a60e218b898990eb6.tar.gz
Improve code generation for edge and pixel-compare, specifically avoid
sign and zero extensions on 64-bit and allow such instructions to be placed in delay slots. gcc/ * config/sparc/sparc.md (edge{8,16,32}{,l}): Return Pmode. (fcmp{le,ne,gt,eq}{16,32}): Likewise. * config/sparc/visintrin.h: Update edge and pixel-compare intrinsics to return 'long' instead of 'int'. * doc/extend.texi: Update documentation to match. * config/sparc/sparc.c (eligible_for_return_delay): When leaf or flat, allow any instruction. Otherwise, when V9 allow parallels which consist only of sets to registers outside of %o0 to %o5. (sparc_vis_init_builtins): Update VIS builtin types for edge and pixel-compare. gcc/testsuite/ * gcc.target/sparc/edge.c: Update for new return types. * gcc.target/sparc/fcmp.c: Likewise. git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@179227 138bc75d-0d04-0410-961f-82ee72b054a4
Diffstat (limited to 'gcc/config/sparc/sparc.c')
-rw-r--r--gcc/config/sparc/sparc.c111
1 files changed, 85 insertions, 26 deletions
diff --git a/gcc/config/sparc/sparc.c b/gcc/config/sparc/sparc.c
index a395321dd3e..d1d83556495 100644
--- a/gcc/config/sparc/sparc.c
+++ b/gcc/config/sparc/sparc.c
@@ -2857,9 +2857,10 @@ eligible_for_restore_insn (rtx trial, bool return_p)
int
eligible_for_return_delay (rtx trial)
{
+ int regno;
rtx pat;
- if (GET_CODE (trial) != INSN || GET_CODE (PATTERN (trial)) != SET)
+ if (GET_CODE (trial) != INSN)
return 0;
if (get_attr_length (trial) != 1)
@@ -2876,17 +2877,45 @@ eligible_for_return_delay (rtx trial)
get_attr_in_uncond_branch_delay (trial) == IN_UNCOND_BRANCH_DELAY_TRUE;
pat = PATTERN (trial);
+ if (GET_CODE (pat) == PARALLEL)
+ {
+ int i;
+
+ if (! TARGET_V9)
+ return 0;
+ for (i = XVECLEN (pat, 0) - 1; i >= 0; i--)
+ {
+ rtx expr = XVECEXP (pat, 0, i);
+ if (GET_CODE (expr) != SET)
+ return 0;
+ if (GET_CODE (SET_DEST (expr)) != REG)
+ return 0;
+ regno = REGNO (SET_DEST (expr));
+ if (regno >= 8 && regno < 24)
+ return 0;
+ }
+ return !epilogue_renumber (&pat, 1)
+ && (get_attr_in_uncond_branch_delay (trial)
+ == IN_UNCOND_BRANCH_DELAY_TRUE);
+ }
+
+ if (GET_CODE (pat) != SET)
+ return 0;
+
+ if (GET_CODE (SET_DEST (pat)) != REG)
+ return 0;
+
+ regno = REGNO (SET_DEST (pat));
/* Otherwise, only operations which can be done in tandem with
a `restore' or `return' insn can go into the delay slot. */
- if (GET_CODE (SET_DEST (pat)) != REG
- || (REGNO (SET_DEST (pat)) >= 8 && REGNO (SET_DEST (pat)) < 24))
+ if (regno >= 8 && regno < 24)
return 0;
/* If this instruction sets up floating point register and we have a return
instruction, it can probably go in. But restore will not work
with FP_REGS. */
- if (REGNO (SET_DEST (pat)) >= 32)
+ if (regno >= 32)
return (TARGET_V9
&& !epilogue_renumber (&pat, 1)
&& get_attr_in_uncond_branch_delay (trial)
@@ -9172,10 +9201,17 @@ sparc_vis_init_builtins (void)
tree si_ftype_ptr_ptr = build_function_type_list (intSI_type_node,
ptr_type_node,
ptr_type_node, 0);
+ tree di_ftype_ptr_ptr = build_function_type_list (intDI_type_node,
+ ptr_type_node,
+ ptr_type_node, 0);
tree si_ftype_v4hi_v4hi = build_function_type_list (intSI_type_node,
v4hi, v4hi, 0);
tree si_ftype_v2si_v2si = build_function_type_list (intSI_type_node,
v2si, v2si, 0);
+ tree di_ftype_v4hi_v4hi = build_function_type_list (intDI_type_node,
+ v4hi, v4hi, 0);
+ tree di_ftype_v2si_v2si = build_function_type_list (intDI_type_node,
+ v2si, v2si, 0);
tree void_ftype_di = build_function_type_list (void_type_node,
intDI_type_node, 0);
tree di_ftype_void = build_function_type_list (intDI_type_node,
@@ -9247,17 +9283,17 @@ sparc_vis_init_builtins (void)
if (TARGET_ARCH64)
{
def_builtin_const ("__builtin_vis_edge8", CODE_FOR_edge8di_vis,
- si_ftype_ptr_ptr);
+ di_ftype_ptr_ptr);
def_builtin_const ("__builtin_vis_edge8l", CODE_FOR_edge8ldi_vis,
- si_ftype_ptr_ptr);
+ di_ftype_ptr_ptr);
def_builtin_const ("__builtin_vis_edge16", CODE_FOR_edge16di_vis,
- si_ftype_ptr_ptr);
+ di_ftype_ptr_ptr);
def_builtin_const ("__builtin_vis_edge16l", CODE_FOR_edge16ldi_vis,
- si_ftype_ptr_ptr);
+ di_ftype_ptr_ptr);
def_builtin_const ("__builtin_vis_edge32", CODE_FOR_edge32di_vis,
- si_ftype_ptr_ptr);
+ di_ftype_ptr_ptr);
def_builtin_const ("__builtin_vis_edge32l", CODE_FOR_edge32ldi_vis,
- si_ftype_ptr_ptr);
+ di_ftype_ptr_ptr);
}
else
{
@@ -9275,22 +9311,45 @@ sparc_vis_init_builtins (void)
si_ftype_ptr_ptr);
}
- def_builtin_const ("__builtin_vis_fcmple16", CODE_FOR_fcmple16_vis,
- si_ftype_v4hi_v4hi);
- def_builtin_const ("__builtin_vis_fcmple32", CODE_FOR_fcmple32_vis,
- si_ftype_v2si_v2si);
- def_builtin_const ("__builtin_vis_fcmpne16", CODE_FOR_fcmpne16_vis,
- si_ftype_v4hi_v4hi);
- def_builtin_const ("__builtin_vis_fcmpne32", CODE_FOR_fcmpne32_vis,
- si_ftype_v2si_v2si);
- def_builtin_const ("__builtin_vis_fcmpgt16", CODE_FOR_fcmpgt16_vis,
- si_ftype_v4hi_v4hi);
- def_builtin_const ("__builtin_vis_fcmpgt32", CODE_FOR_fcmpgt32_vis,
- si_ftype_v2si_v2si);
- def_builtin_const ("__builtin_vis_fcmpeq16", CODE_FOR_fcmpeq16_vis,
- si_ftype_v4hi_v4hi);
- def_builtin_const ("__builtin_vis_fcmpeq32", CODE_FOR_fcmpeq32_vis,
- si_ftype_v2si_v2si);
+ /* Pixel compare. */
+ if (TARGET_ARCH64)
+ {
+ def_builtin_const ("__builtin_vis_fcmple16", CODE_FOR_fcmple16di_vis,
+ di_ftype_v4hi_v4hi);
+ def_builtin_const ("__builtin_vis_fcmple32", CODE_FOR_fcmple32di_vis,
+ di_ftype_v2si_v2si);
+ def_builtin_const ("__builtin_vis_fcmpne16", CODE_FOR_fcmpne16di_vis,
+ di_ftype_v4hi_v4hi);
+ def_builtin_const ("__builtin_vis_fcmpne32", CODE_FOR_fcmpne32di_vis,
+ di_ftype_v2si_v2si);
+ def_builtin_const ("__builtin_vis_fcmpgt16", CODE_FOR_fcmpgt16di_vis,
+ di_ftype_v4hi_v4hi);
+ def_builtin_const ("__builtin_vis_fcmpgt32", CODE_FOR_fcmpgt32di_vis,
+ di_ftype_v2si_v2si);
+ def_builtin_const ("__builtin_vis_fcmpeq16", CODE_FOR_fcmpeq16di_vis,
+ di_ftype_v4hi_v4hi);
+ def_builtin_const ("__builtin_vis_fcmpeq32", CODE_FOR_fcmpeq32di_vis,
+ di_ftype_v2si_v2si);
+ }
+ else
+ {
+ def_builtin_const ("__builtin_vis_fcmple16", CODE_FOR_fcmple16si_vis,
+ si_ftype_v4hi_v4hi);
+ def_builtin_const ("__builtin_vis_fcmple32", CODE_FOR_fcmple32si_vis,
+ si_ftype_v2si_v2si);
+ def_builtin_const ("__builtin_vis_fcmpne16", CODE_FOR_fcmpne16si_vis,
+ si_ftype_v4hi_v4hi);
+ def_builtin_const ("__builtin_vis_fcmpne32", CODE_FOR_fcmpne32si_vis,
+ si_ftype_v2si_v2si);
+ def_builtin_const ("__builtin_vis_fcmpgt16", CODE_FOR_fcmpgt16si_vis,
+ si_ftype_v4hi_v4hi);
+ def_builtin_const ("__builtin_vis_fcmpgt32", CODE_FOR_fcmpgt32si_vis,
+ si_ftype_v2si_v2si);
+ def_builtin_const ("__builtin_vis_fcmpeq16", CODE_FOR_fcmpeq16si_vis,
+ si_ftype_v4hi_v4hi);
+ def_builtin_const ("__builtin_vis_fcmpeq32", CODE_FOR_fcmpeq32si_vis,
+ si_ftype_v2si_v2si);
+ }
}
/* Handle TARGET_EXPAND_BUILTIN target hook.