diff options
author | davem <davem@138bc75d-0d04-0410-961f-82ee72b054a4> | 2011-09-27 02:10:10 +0000 |
---|---|---|
committer | davem <davem@138bc75d-0d04-0410-961f-82ee72b054a4> | 2011-09-27 02:10:10 +0000 |
commit | faa087bb581c160bb7b2e42a60e218b898990eb6 (patch) | |
tree | d8e42dd1926af159fe00a417353501088bd00c7c /gcc/config/sparc/sparc.c | |
parent | 180b6c8627c425036c1f79fd380b50baedf862bc (diff) | |
download | gcc-faa087bb581c160bb7b2e42a60e218b898990eb6.tar.gz |
Improve code generation for edge and pixel-compare, specifically avoid
sign and zero extensions on 64-bit and allow such instructions to be
placed in delay slots.
gcc/
* config/sparc/sparc.md (edge{8,16,32}{,l}): Return Pmode.
(fcmp{le,ne,gt,eq}{16,32}): Likewise.
* config/sparc/visintrin.h: Update edge and pixel-compare
intrinsics to return 'long' instead of 'int'.
* doc/extend.texi: Update documentation to match.
* config/sparc/sparc.c (eligible_for_return_delay): When leaf or
flat, allow any instruction. Otherwise, when V9 allow parallels
which consist only of sets to registers outside of %o0 to %o5.
(sparc_vis_init_builtins): Update VIS builtin types for edge
and pixel-compare.
gcc/testsuite/
* gcc.target/sparc/edge.c: Update for new return types.
* gcc.target/sparc/fcmp.c: Likewise.
git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@179227 138bc75d-0d04-0410-961f-82ee72b054a4
Diffstat (limited to 'gcc/config/sparc/sparc.c')
-rw-r--r-- | gcc/config/sparc/sparc.c | 111 |
1 files changed, 85 insertions, 26 deletions
diff --git a/gcc/config/sparc/sparc.c b/gcc/config/sparc/sparc.c index a395321dd3e..d1d83556495 100644 --- a/gcc/config/sparc/sparc.c +++ b/gcc/config/sparc/sparc.c @@ -2857,9 +2857,10 @@ eligible_for_restore_insn (rtx trial, bool return_p) int eligible_for_return_delay (rtx trial) { + int regno; rtx pat; - if (GET_CODE (trial) != INSN || GET_CODE (PATTERN (trial)) != SET) + if (GET_CODE (trial) != INSN) return 0; if (get_attr_length (trial) != 1) @@ -2876,17 +2877,45 @@ eligible_for_return_delay (rtx trial) get_attr_in_uncond_branch_delay (trial) == IN_UNCOND_BRANCH_DELAY_TRUE; pat = PATTERN (trial); + if (GET_CODE (pat) == PARALLEL) + { + int i; + + if (! TARGET_V9) + return 0; + for (i = XVECLEN (pat, 0) - 1; i >= 0; i--) + { + rtx expr = XVECEXP (pat, 0, i); + if (GET_CODE (expr) != SET) + return 0; + if (GET_CODE (SET_DEST (expr)) != REG) + return 0; + regno = REGNO (SET_DEST (expr)); + if (regno >= 8 && regno < 24) + return 0; + } + return !epilogue_renumber (&pat, 1) + && (get_attr_in_uncond_branch_delay (trial) + == IN_UNCOND_BRANCH_DELAY_TRUE); + } + + if (GET_CODE (pat) != SET) + return 0; + + if (GET_CODE (SET_DEST (pat)) != REG) + return 0; + + regno = REGNO (SET_DEST (pat)); /* Otherwise, only operations which can be done in tandem with a `restore' or `return' insn can go into the delay slot. */ - if (GET_CODE (SET_DEST (pat)) != REG - || (REGNO (SET_DEST (pat)) >= 8 && REGNO (SET_DEST (pat)) < 24)) + if (regno >= 8 && regno < 24) return 0; /* If this instruction sets up floating point register and we have a return instruction, it can probably go in. But restore will not work with FP_REGS. */ - if (REGNO (SET_DEST (pat)) >= 32) + if (regno >= 32) return (TARGET_V9 && !epilogue_renumber (&pat, 1) && get_attr_in_uncond_branch_delay (trial) @@ -9172,10 +9201,17 @@ sparc_vis_init_builtins (void) tree si_ftype_ptr_ptr = build_function_type_list (intSI_type_node, ptr_type_node, ptr_type_node, 0); + tree di_ftype_ptr_ptr = build_function_type_list (intDI_type_node, + ptr_type_node, + ptr_type_node, 0); tree si_ftype_v4hi_v4hi = build_function_type_list (intSI_type_node, v4hi, v4hi, 0); tree si_ftype_v2si_v2si = build_function_type_list (intSI_type_node, v2si, v2si, 0); + tree di_ftype_v4hi_v4hi = build_function_type_list (intDI_type_node, + v4hi, v4hi, 0); + tree di_ftype_v2si_v2si = build_function_type_list (intDI_type_node, + v2si, v2si, 0); tree void_ftype_di = build_function_type_list (void_type_node, intDI_type_node, 0); tree di_ftype_void = build_function_type_list (intDI_type_node, @@ -9247,17 +9283,17 @@ sparc_vis_init_builtins (void) if (TARGET_ARCH64) { def_builtin_const ("__builtin_vis_edge8", CODE_FOR_edge8di_vis, - si_ftype_ptr_ptr); + di_ftype_ptr_ptr); def_builtin_const ("__builtin_vis_edge8l", CODE_FOR_edge8ldi_vis, - si_ftype_ptr_ptr); + di_ftype_ptr_ptr); def_builtin_const ("__builtin_vis_edge16", CODE_FOR_edge16di_vis, - si_ftype_ptr_ptr); + di_ftype_ptr_ptr); def_builtin_const ("__builtin_vis_edge16l", CODE_FOR_edge16ldi_vis, - si_ftype_ptr_ptr); + di_ftype_ptr_ptr); def_builtin_const ("__builtin_vis_edge32", CODE_FOR_edge32di_vis, - si_ftype_ptr_ptr); + di_ftype_ptr_ptr); def_builtin_const ("__builtin_vis_edge32l", CODE_FOR_edge32ldi_vis, - si_ftype_ptr_ptr); + di_ftype_ptr_ptr); } else { @@ -9275,22 +9311,45 @@ sparc_vis_init_builtins (void) si_ftype_ptr_ptr); } - def_builtin_const ("__builtin_vis_fcmple16", CODE_FOR_fcmple16_vis, - si_ftype_v4hi_v4hi); - def_builtin_const ("__builtin_vis_fcmple32", CODE_FOR_fcmple32_vis, - si_ftype_v2si_v2si); - def_builtin_const ("__builtin_vis_fcmpne16", CODE_FOR_fcmpne16_vis, - si_ftype_v4hi_v4hi); - def_builtin_const ("__builtin_vis_fcmpne32", CODE_FOR_fcmpne32_vis, - si_ftype_v2si_v2si); - def_builtin_const ("__builtin_vis_fcmpgt16", CODE_FOR_fcmpgt16_vis, - si_ftype_v4hi_v4hi); - def_builtin_const ("__builtin_vis_fcmpgt32", CODE_FOR_fcmpgt32_vis, - si_ftype_v2si_v2si); - def_builtin_const ("__builtin_vis_fcmpeq16", CODE_FOR_fcmpeq16_vis, - si_ftype_v4hi_v4hi); - def_builtin_const ("__builtin_vis_fcmpeq32", CODE_FOR_fcmpeq32_vis, - si_ftype_v2si_v2si); + /* Pixel compare. */ + if (TARGET_ARCH64) + { + def_builtin_const ("__builtin_vis_fcmple16", CODE_FOR_fcmple16di_vis, + di_ftype_v4hi_v4hi); + def_builtin_const ("__builtin_vis_fcmple32", CODE_FOR_fcmple32di_vis, + di_ftype_v2si_v2si); + def_builtin_const ("__builtin_vis_fcmpne16", CODE_FOR_fcmpne16di_vis, + di_ftype_v4hi_v4hi); + def_builtin_const ("__builtin_vis_fcmpne32", CODE_FOR_fcmpne32di_vis, + di_ftype_v2si_v2si); + def_builtin_const ("__builtin_vis_fcmpgt16", CODE_FOR_fcmpgt16di_vis, + di_ftype_v4hi_v4hi); + def_builtin_const ("__builtin_vis_fcmpgt32", CODE_FOR_fcmpgt32di_vis, + di_ftype_v2si_v2si); + def_builtin_const ("__builtin_vis_fcmpeq16", CODE_FOR_fcmpeq16di_vis, + di_ftype_v4hi_v4hi); + def_builtin_const ("__builtin_vis_fcmpeq32", CODE_FOR_fcmpeq32di_vis, + di_ftype_v2si_v2si); + } + else + { + def_builtin_const ("__builtin_vis_fcmple16", CODE_FOR_fcmple16si_vis, + si_ftype_v4hi_v4hi); + def_builtin_const ("__builtin_vis_fcmple32", CODE_FOR_fcmple32si_vis, + si_ftype_v2si_v2si); + def_builtin_const ("__builtin_vis_fcmpne16", CODE_FOR_fcmpne16si_vis, + si_ftype_v4hi_v4hi); + def_builtin_const ("__builtin_vis_fcmpne32", CODE_FOR_fcmpne32si_vis, + si_ftype_v2si_v2si); + def_builtin_const ("__builtin_vis_fcmpgt16", CODE_FOR_fcmpgt16si_vis, + si_ftype_v4hi_v4hi); + def_builtin_const ("__builtin_vis_fcmpgt32", CODE_FOR_fcmpgt32si_vis, + si_ftype_v2si_v2si); + def_builtin_const ("__builtin_vis_fcmpeq16", CODE_FOR_fcmpeq16si_vis, + si_ftype_v4hi_v4hi); + def_builtin_const ("__builtin_vis_fcmpeq32", CODE_FOR_fcmpeq32si_vis, + si_ftype_v2si_v2si); + } } /* Handle TARGET_EXPAND_BUILTIN target hook. |