diff options
author | bernds <bernds@138bc75d-0d04-0410-961f-82ee72b054a4> | 2011-07-07 15:42:41 +0000 |
---|---|---|
committer | bernds <bernds@138bc75d-0d04-0410-961f-82ee72b054a4> | 2011-07-07 15:42:41 +0000 |
commit | 1b727a0a1626f41ef9461a472c266d6a347ee52b (patch) | |
tree | f9552f79c86d74255de5a691ef17a3767e4337fb /gcc/hw-doloop.c | |
parent | 596b2ba34652c9c0fb301522d9a0014a046ec277 (diff) | |
download | gcc-1b727a0a1626f41ef9461a472c266d6a347ee52b.tar.gz |
* hw-doloop.c: New file.
* hw-doloop.h: New file.
* Makefile.in (OBJS): Add hw-doloop.o.
(hw-doloop.o): New rule.
($(obj_out_file)): Add hw-doloop.h dependency.
* config/bfin/bfin.c: Include "hw-doloop.h".
(loop_info, DEF_VEC_P for loop_info, loop_info_d): Remove.
(bfin_dump_loops, bfin_bb_in_loop, bfin_scan_loop): Remove.
(hwloop_optimize): Renamed from bfin_optimize_loop. Argument
type changed to hwloop_info. Return bool, true if the loop was
successfully optimized. Remove code that was moved to
hw-doloop.c, and adjust other parts.
(hwloop_fail): New static function, containing parts that used
to be in bfin_optimize_loop.
(bfin_discover_loop, bfin_discover_loops, free_loops,
bfin_reorder_loops): Remove.
(hwloop_pattern_reg): New static function.
(bfin_doloop_hooks): New variable.
(bfin_reorg_loops): Remove most code, call reorg_loops.
* config/bfin/bfin.md (doloop_end splitter): Also enable if
loop counter is a memory_operand.
git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@175985 138bc75d-0d04-0410-961f-82ee72b054a4
Diffstat (limited to 'gcc/hw-doloop.c')
-rw-r--r-- | gcc/hw-doloop.c | 672 |
1 files changed, 672 insertions, 0 deletions
diff --git a/gcc/hw-doloop.c b/gcc/hw-doloop.c new file mode 100644 index 00000000000..3d59200a84b --- /dev/null +++ b/gcc/hw-doloop.c @@ -0,0 +1,672 @@ +/* Code to analyze doloop loops in order for targets to perform late + optimizations converting doloops to other forms of hardware loops. + Copyright (C) 2011 Free Software Foundation, Inc. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 3, or (at your option) any later +version. + +GCC is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +<http://www.gnu.org/licenses/>. */ + +#include "config.h" +#include "system.h" +#include "coretypes.h" +#include "tm.h" +#include "rtl.h" +#include "flags.h" +#include "expr.h" +#include "hard-reg-set.h" +#include "regs.h" +#include "basic-block.h" +#include "tm_p.h" +#include "df.h" +#include "cfglayout.h" +#include "cfgloop.h" +#include "output.h" +#include "recog.h" +#include "target.h" +#include "hw-doloop.h" + +#ifdef HAVE_doloop_end + +/* Dump information collected in LOOPS. */ +static void +dump_hwloops (hwloop_info loops) +{ + hwloop_info loop; + + for (loop = loops; loop; loop = loop->next) + { + hwloop_info i; + basic_block b; + unsigned ix; + + fprintf (dump_file, ";; loop %d: ", loop->loop_no); + if (loop->bad) + fprintf (dump_file, "(bad) "); + fprintf (dump_file, "{head:%d, depth:%d, reg:%u}", + loop->head == NULL ? -1 : loop->head->index, + loop->depth, REGNO (loop->iter_reg)); + + fprintf (dump_file, " blocks: [ "); + for (ix = 0; VEC_iterate (basic_block, loop->blocks, ix, b); ix++) + fprintf (dump_file, "%d ", b->index); + fprintf (dump_file, "] "); + + fprintf (dump_file, " inner loops: [ "); + for (ix = 0; VEC_iterate (hwloop_info, loop->loops, ix, i); ix++) + fprintf (dump_file, "%d ", i->loop_no); + fprintf (dump_file, "]\n"); + } + fprintf (dump_file, "\n"); +} + +/* Return true if BB is part of LOOP. */ +static bool +bb_in_loop_p (hwloop_info loop, basic_block bb) +{ + return bitmap_bit_p (loop->block_bitmap, bb->index); +} + +/* Scan the blocks of LOOP (and its inferiors), and record things such as + hard registers set, jumps out of and within the loop. */ +static void +scan_loop (hwloop_info loop) +{ + unsigned ix; + basic_block bb; + + if (loop->bad) + return; + + if (REGNO_REG_SET_P (df_get_live_in (loop->successor), + REGNO (loop->iter_reg))) + loop->iter_reg_used_outside = true; + + for (ix = 0; VEC_iterate (basic_block, loop->blocks, ix, bb); ix++) + { + rtx insn; + edge e; + edge_iterator ei; + + if (bb != loop->tail) + FOR_EACH_EDGE (e, ei, bb->succs) + { + if (bb_in_loop_p (loop, e->dest)) + { + if (!(e->flags & EDGE_FALLTHRU)) + loop->jumps_within = true; + } + else + { + loop->jumps_outof = true; + if (!loop->bad) + gcc_assert (!REGNO_REG_SET_P (df_get_live_in (e->dest), + REGNO (loop->iter_reg))); + } + } + + for (insn = BB_HEAD (bb); + insn != NEXT_INSN (BB_END (bb)); + insn = NEXT_INSN (insn)) + { + df_ref *def_rec; + HARD_REG_SET set_this_insn; + + if (!INSN_P (insn)) + continue; + + if (recog_memoized (insn) < 0 + && (GET_CODE (PATTERN (insn)) == ASM_INPUT + || asm_noperands (PATTERN (insn)) >= 0)) + loop->has_asm = true; + + CLEAR_HARD_REG_SET (set_this_insn); + for (def_rec = DF_INSN_DEFS (insn); *def_rec; def_rec++) + { + rtx dreg = DF_REF_REG (*def_rec); + + if (!REG_P (dreg)) + continue; + + add_to_hard_reg_set (&set_this_insn, GET_MODE (dreg), + REGNO (dreg)); + } + + if (insn == loop->loop_end) + CLEAR_HARD_REG_BIT (set_this_insn, REGNO (loop->iter_reg)); + else if (reg_mentioned_p (loop->iter_reg, PATTERN (insn))) + loop->iter_reg_used = true; + IOR_HARD_REG_SET (loop->regs_set_in_loop, set_this_insn); + } + } +} + +/* Compute the incoming_dest and incoming_src members of LOOP by + identifying the edges that jump into the loop. If there is more + than one block that jumps into the loop, incoming_src will be set + to NULL; likewise, if there is more than one block in the loop that + is the destination of an incoming edge, incoming_dest will be NULL. + + Return true if either of these two fields is nonnull, false + otherwise. */ +static bool +process_incoming_edges (hwloop_info loop) +{ + edge e; + edge_iterator ei; + bool first = true; + + FOR_EACH_EDGE (e, ei, loop->incoming) + { + if (first) + { + loop->incoming_src = e->src; + loop->incoming_dest = e->dest; + first = false; + } + else + { + if (e->dest != loop->incoming_dest) + loop->incoming_dest = NULL; + if (e->src != loop->incoming_src) + loop->incoming_src = NULL; + } + } + if (loop->incoming_src == NULL && loop->incoming_dest == NULL) + return false; + + return true; +} + +/* Try to identify a forwarder block that jump into LOOP, and add it to + the set of blocks in the loop, updating the vector of incoming blocks as + well. This transformation gives a second chance to loops we couldn't + otherwise handle by increasing the chance that we'll end up with one + incoming_src block. + Return true if we made a change, false otherwise. */ +static bool +add_forwarder_blocks (hwloop_info loop) +{ + edge e; + edge_iterator ei; + + FOR_EACH_EDGE (e, ei, loop->incoming) + { + if (forwarder_block_p (e->src)) + { + edge e2; + edge_iterator ei2; + + if (dump_file) + fprintf (dump_file, + ";; Adding forwarder block %d to loop %d and retrying\n", + e->src->index, loop->loop_no); + VEC_safe_push (basic_block, heap, loop->blocks, e->src); + bitmap_set_bit (loop->block_bitmap, e->src->index); + FOR_EACH_EDGE (e2, ei2, e->src->preds) + VEC_safe_push (edge, gc, loop->incoming, e2); + VEC_unordered_remove (edge, loop->incoming, ei.index); + return true; + } + } + return false; +} + +/* Called from reorg_loops when a potential loop end is found. LOOP is + a newly set up structure describing the loop, it is this function's + responsibility to fill most of it. TAIL_BB and TAIL_INSN point to the + loop_end insn and its enclosing basic block. REG is the loop counter + register. + For our purposes, a loop is defined by the set of blocks reachable from + the loop head in which the loop counter register is live. This matches + the expected use; targets that call into this code usually replace the + loop counter with a different special register. */ +static void +discover_loop (hwloop_info loop, basic_block tail_bb, rtx tail_insn, rtx reg) +{ + bool found_tail; + unsigned dwork = 0; + basic_block bb; + VEC (basic_block,heap) *works; + + loop->tail = tail_bb; + loop->loop_end = tail_insn; + loop->iter_reg = reg; + loop->incoming = VEC_alloc (edge, gc, 2); + loop->start_label = JUMP_LABEL (tail_insn); + + if (EDGE_COUNT (tail_bb->succs) != 2) + { + loop->bad = true; + return; + } + loop->head = BRANCH_EDGE (tail_bb)->dest; + loop->successor = FALLTHRU_EDGE (tail_bb)->dest; + + works = VEC_alloc (basic_block, heap, 20); + VEC_safe_push (basic_block, heap, works, loop->head); + + found_tail = false; + for (dwork = 0; VEC_iterate (basic_block, works, dwork, bb); dwork++) + { + edge e; + edge_iterator ei; + if (bb == EXIT_BLOCK_PTR) + { + /* We've reached the exit block. The loop must be bad. */ + if (dump_file) + fprintf (dump_file, + ";; Loop is bad - reached exit block while scanning\n"); + loop->bad = true; + break; + } + + if (bitmap_bit_p (loop->block_bitmap, bb->index)) + continue; + + /* We've not seen this block before. Add it to the loop's + list and then add each successor to the work list. */ + + VEC_safe_push (basic_block, heap, loop->blocks, bb); + bitmap_set_bit (loop->block_bitmap, bb->index); + + if (bb == tail_bb) + found_tail = true; + else + { + FOR_EACH_EDGE (e, ei, bb->succs) + { + basic_block succ = EDGE_SUCC (bb, ei.index)->dest; + if (REGNO_REG_SET_P (df_get_live_in (succ), + REGNO (loop->iter_reg))) + VEC_safe_push (basic_block, heap, works, succ); + } + } + } + + if (!found_tail) + loop->bad = true; + + /* Find the predecessor, and make sure nothing else jumps into this loop. */ + if (!loop->bad) + { + FOR_EACH_VEC_ELT (basic_block, loop->blocks, dwork, bb) + { + edge e; + edge_iterator ei; + FOR_EACH_EDGE (e, ei, bb->preds) + { + basic_block pred = e->src; + + if (!bb_in_loop_p (loop, pred)) + { + if (dump_file) + fprintf (dump_file, ";; Loop %d: incoming edge %d -> %d\n", + loop->loop_no, pred->index, + e->dest->index); + VEC_safe_push (edge, gc, loop->incoming, e); + } + } + } + + if (!process_incoming_edges (loop)) + { + if (dump_file) + fprintf (dump_file, + ";; retrying loop %d with forwarder blocks\n", + loop->loop_no); + if (!add_forwarder_blocks (loop)) + { + if (dump_file) + fprintf (dump_file, ";; No forwarder blocks found\n"); + loop->bad = true; + } + else if (!process_incoming_edges (loop)) + { + if (dump_file) + fprintf (dump_file, + ";; can't find suitable entry for loop %d\n", + loop->loop_no); + } + } + } + + VEC_free (basic_block, heap, works); +} + +/* Analyze the structure of the loops in the current function. Use + STACK for bitmap allocations. Returns all the valid candidates for + hardware loops found in this function. HOOKS is the argument + passed to reorg_loops, used here to find the iteration registers + from a loop_end pattern. */ +static hwloop_info +discover_loops (bitmap_obstack *stack, struct hw_doloop_hooks *hooks) +{ + hwloop_info loops = NULL; + hwloop_info loop; + basic_block bb; + int nloops = 0; + + /* Find all the possible loop tails. This means searching for every + loop_end instruction. For each one found, create a hwloop_info + structure and add the head block to the work list. */ + FOR_EACH_BB (bb) + { + rtx tail = BB_END (bb); + rtx insn, reg; + + while (tail && GET_CODE (tail) == NOTE && tail != BB_HEAD (bb)) + tail = PREV_INSN (tail); + + if (tail == NULL_RTX) + continue; + + if (!JUMP_P (tail)) + continue; + reg = hooks->end_pattern_reg (tail); + if (reg == NULL_RTX) + continue; + + /* A possible loop end */ + + /* There's a degenerate case we can handle - an empty loop consisting + of only a back branch. Handle that by deleting the branch. */ + insn = JUMP_LABEL (tail); + while (insn && !NONDEBUG_INSN_P (insn)) + insn = NEXT_INSN (insn); + if (insn == tail) + { + basic_block succ = FALLTHRU_EDGE (bb)->dest; + if (dump_file) + { + fprintf (dump_file, ";; degenerate loop ending at\n"); + print_rtl_single (dump_file, tail); + } + if (!REGNO_REG_SET_P (df_get_live_in (succ), REGNO (reg))) + { + if (dump_file) + fprintf (dump_file, ";; deleting it\n"); + delete_insn_and_edges (tail); + continue; + } + } + + loop = XCNEW (struct hwloop_info_d); + loop->next = loops; + loops = loop; + loop->loop_no = nloops++; + loop->blocks = VEC_alloc (basic_block, heap, 20); + loop->block_bitmap = BITMAP_ALLOC (stack); + + if (dump_file) + { + fprintf (dump_file, ";; potential loop %d ending at\n", + loop->loop_no); + print_rtl_single (dump_file, tail); + } + + discover_loop (loop, bb, tail, reg); + } + + /* Compute loop nestings. Given two loops A and B, either the sets + of their blocks don't intersect at all, or one is the subset of + the other, or the blocks don't form a good nesting structure. */ + for (loop = loops; loop; loop = loop->next) + { + hwloop_info other; + + if (loop->bad) + continue; + + for (other = loops; other; other = other->next) + { + if (other->bad) + continue; + + if (!bitmap_intersect_p (other->block_bitmap, loop->block_bitmap)) + continue; + if (!bitmap_intersect_compl_p (other->block_bitmap, + loop->block_bitmap)) + VEC_safe_push (hwloop_info, heap, loop->loops, other); + else if (!bitmap_intersect_compl_p (loop->block_bitmap, + other->block_bitmap)) + VEC_safe_push (hwloop_info, heap, other->loops, loop); + else + { + if (dump_file) + fprintf (dump_file, + ";; can't find suitable nesting for loops %d and %d\n", + loop->loop_no, other->loop_no); + loop->bad = other->bad = true; + } + } + } + + if (dump_file) + dump_hwloops (loops); + + return loops; +} + +/* Free up the loop structures in LOOPS. */ +static void +free_loops (hwloop_info loops) +{ + while (loops) + { + hwloop_info loop = loops; + loops = loop->next; + VEC_free (hwloop_info, heap, loop->loops); + VEC_free (basic_block, heap, loop->blocks); + BITMAP_FREE (loop->block_bitmap); + XDELETE (loop); + } +} + +#define BB_AUX_INDEX(BB) ((intptr_t) (BB)->aux) + +/* Initialize the aux fields to give ascending indices to basic blocks. */ +static void +set_bb_indices (void) +{ + basic_block bb; + intptr_t index; + + index = 0; + FOR_EACH_BB (bb) + bb->aux = (void *) index++; +} + +/* The taken-branch edge from the loop end can actually go forward. + If the target's hardware loop support requires that the loop end be + after the loop start, try to reorder a loop's basic blocks when we + find such a case. + This is not very aggressive; it only moves at most one block. It + does not introduce new branches into loops; it may remove them, or + it may switch fallthru/jump edges. */ +static void +reorder_loops (hwloop_info loops) +{ + basic_block bb; + hwloop_info loop; + + cfg_layout_initialize (0); + + set_bb_indices (); + + for (loop = loops; loop; loop = loop->next) + { + edge e; + edge_iterator ei; + + if (loop->bad) + continue; + + if (BB_AUX_INDEX (loop->head) <= BB_AUX_INDEX (loop->tail)) + continue; + + FOR_EACH_EDGE (e, ei, loop->head->succs) + { + if (bitmap_bit_p (loop->block_bitmap, e->dest->index) + && BB_AUX_INDEX (e->dest) < BB_AUX_INDEX (loop->tail)) + { + basic_block start_bb = e->dest; + basic_block start_prev_bb = start_bb->prev_bb; + + if (dump_file) + fprintf (dump_file, ";; Moving block %d before block %d\n", + loop->head->index, start_bb->index); + loop->head->prev_bb->next_bb = loop->head->next_bb; + loop->head->next_bb->prev_bb = loop->head->prev_bb; + + loop->head->prev_bb = start_prev_bb; + loop->head->next_bb = start_bb; + start_prev_bb->next_bb = start_bb->prev_bb = loop->head; + + set_bb_indices (); + break; + } + } + loops = loops->next; + } + + FOR_EACH_BB (bb) + { + if (bb->next_bb != EXIT_BLOCK_PTR) + bb->aux = bb->next_bb; + else + bb->aux = NULL; + } + cfg_layout_finalize (); + clear_aux_for_blocks (); + df_analyze (); +} + +/* Call the OPT function for LOOP and all of its sub-loops. This is + done in a depth-first search; innermost loops are visited first. + OPTIMIZE and FAIL are the functions passed to reorg_loops by the + target's reorg pass. */ +static void +optimize_loop (hwloop_info loop, struct hw_doloop_hooks *hooks) +{ + int ix; + hwloop_info inner; + int inner_depth = 0; + + if (loop->visited) + return; + + loop->visited = 1; + + if (loop->bad) + { + if (dump_file) + fprintf (dump_file, ";; loop %d bad when found\n", loop->loop_no); + goto bad_loop; + } + + /* Every loop contains in its list of inner loops every loop nested inside + it, even if there are intermediate loops. This works because we're doing + a depth-first search here and never visit a loop more than once. + Recursion depth is effectively limited by the number of available + hardware registers. */ + for (ix = 0; VEC_iterate (hwloop_info, loop->loops, ix, inner); ix++) + { + optimize_loop (inner, hooks); + + if (!inner->bad && inner_depth < inner->depth) + inner_depth = inner->depth; + /* The set of registers may be changed while optimizing the inner + loop. */ + IOR_HARD_REG_SET (loop->regs_set_in_loop, inner->regs_set_in_loop); + } + + loop->depth = inner_depth + 1; + + if (hooks->opt (loop)) + return; + + bad_loop: + if (dump_file) + fprintf (dump_file, ";; loop %d is bad\n", loop->loop_no); + + loop->bad = true; + hooks->fail (loop); +} + +/* This function can be used from a port's machine_dependent_reorg to + find and analyze loops that end in loop_end instructions. It uses + a set of function pointers in HOOKS to call back into the + target-specific functions to perform the actual machine-specific + transformations. + + Such transformations typically involve additional set-up + instructions before the loop, to define loop bounds or set up a + special loop counter register. + + DO_REORDER should be set to true if we should try to use the + reorder_loops function to ensure the loop end occurs after the loop + start. This is for use by targets where the loop hardware requires + this condition. + + HOOKS is used to pass in target specific hooks; see + hw-doloop.h. */ +void +reorg_loops (bool do_reorder, struct hw_doloop_hooks *hooks) +{ + hwloop_info loops = NULL; + hwloop_info loop; + bitmap_obstack stack; + + df_live_add_problem (); + df_live_set_all_dirty (); + df_analyze (); + + bitmap_obstack_initialize (&stack); + + if (dump_file) + fprintf (dump_file, ";; Find loops, first pass\n\n"); + + loops = discover_loops (&stack, hooks); + + if (do_reorder) + { + reorder_loops (loops); + free_loops (loops); + + if (dump_file) + fprintf (dump_file, ";; Find loops, second pass\n\n"); + + loops = discover_loops (&stack, hooks); + } + + for (loop = loops; loop; loop = loop->next) + scan_loop (loop); + + /* Now apply the optimizations. */ + for (loop = loops; loop; loop = loop->next) + optimize_loop (loop, hooks); + + if (dump_file) + { + fprintf (dump_file, ";; After hardware loops optimization:\n\n"); + dump_hwloops (loops); + } + + free_loops (loops); + + if (dump_file) + print_rtl (dump_file, get_insns ()); +} +#endif |