diff options
Diffstat (limited to 'gcc')
-rw-r--r-- | gcc/ChangeLog-ira-reload | 458 | ||||
-rw-r--r-- | gcc/Makefile.in | 7 | ||||
-rw-r--r-- | gcc/config/sparc/gas.h | 6 | ||||
-rw-r--r-- | gcc/ira-build.c | 71 | ||||
-rw-r--r-- | gcc/ira-color.c | 56 | ||||
-rw-r--r-- | gcc/ira-conflicts.c | 13 | ||||
-rw-r--r-- | gcc/ira-costs.c | 26 | ||||
-rw-r--r-- | gcc/ira-int.h | 10 | ||||
-rw-r--r-- | gcc/ira-reload.c | 1595 | ||||
-rw-r--r-- | gcc/ira.c | 6 | ||||
-rw-r--r-- | gcc/ira.h | 3 | ||||
-rw-r--r-- | gcc/params.def | 8 | ||||
-rw-r--r-- | gcc/regs.h | 1 | ||||
-rw-r--r-- | gcc/regstat.c | 10 | ||||
-rw-r--r-- | gcc/reload.h | 8 | ||||
-rw-r--r-- | gcc/reload1.c | 351 |
16 files changed, 2543 insertions, 86 deletions
diff --git a/gcc/ChangeLog-ira-reload b/gcc/ChangeLog-ira-reload new file mode 100644 index 00000000000..4d9ee4dc5e2 --- /dev/null +++ b/gcc/ChangeLog-ira-reload @@ -0,0 +1,458 @@ +2010-07-21 Jeff Law <law@redhat.com> + + * ira-reload.c (emit_localizing_stores): Just delete stores + to readonly-memory. + +2010-04-14 Jeff Law <law@redhat.com> + + * ira-reload.c (create_new_allocno_for_spilling): Remove code to set + ALLOCNO_REG. Various mechanical changes to remove the concept of + cover classes. + (copy_allocno_for_spilling): Likewise. + (localize_pseudos): Don't use ira_class_translate, just use the + preferred class. + (ira_reload): Update call to ira_tune_allocno_cost. + (maybe_add_conflict): Revamp test for ignoring conflicts. + +2010-03-30 Jeff Law <law@redhat.com> + + * ira-reload.c (ira_reload): If necessary, call commit_edge_insertions + after fixup_abnormal_edges. + +2010-02-01 Jeff Law <law@redhat.com> + + * ira-reload.c (ira_reload): Wipe equivalence information and + REG_EQUIV notes which refer to PSEUDOS_TO_LOCALIZE. + +2010-12-23 Jeff Law <law@redhat.com> + + * reload1.c (record_equivalences_for_reload): Mirror Bernd's change + #160947 for init_eliminable_invariants. + +2010-12-21 Jeff Law <law@redhat.com> + + * ira-reload.c (build_conflicts_for_new_allocnos): If the new + allocno crosses calls, then mark CALL_USED_REG_SET as conflicting. + +2010-12-14 Jeff Law <law@redhat.com> + + * reload.h (reg_equiv_memory_loc_vec): Move into reg_equivs structure. + (reg_equiv_constant, reg_equiv_invariant): Likewise. + (reg_equiv_memory_loc, reg_equiv_address): Likewise. + (reg_equiv_mem, reg_equiv_alt_mem_list): Likewise. + (reg_equiv_init, reg_equiv_init_size): Likewise. + (struct reg_equivs): New structure. + (reg_equivs): VEC of struct reg_equiv. + * caller-save.c (mark_referenced_regs): Corresponding changes to + access the various reg_equiv_XXX arrays. + * ira.c (find_equiv_invariant_cost): Likewise. + (fix_reg_equiv_init, no_equiv): Likewise. + (update_equiv_regs): Likewise. + (ira): Likewise. + * ira-reload.c (localize_pseudo_p): Likewise. + (emit_localizing_loads, emit_localizing_stores): Likewise. + (localize_pseudos): Likewise. + * integrate.c (allocate_initial_values): Likewise. + * reload.c (push_reg_equiv_alt_mem): Likewise. + (push_secondary_reload, push_reload): Likewise. + (find_reloads, find_reload_toplev): Likewise. + (make_memloc, find_reloads_address): Likewise. + (subst_reg_equivs, subst_indexed_address): Likewise. + (find_reloads_address_1, find_reloads_subreg_address): Likewise. + (refers_to_mem_for_reload_1, find_reg_equiv): Likewise. + * reload1.c (replace_pseudos_in): Likewise. + (reload, record_equivalences_for_reload): Likewise. + (calculate_needs_all_insns): Likewise. + (calculate_elim_costs_all_insns, alter_reg): Likewise. + (note_reg_elim_costly, eliminate_regs_1): Likewise. + (elimination_effects, init_eliminable_invariants): Likewise. + (free_reg_equiv, emit_input_reload_insns): Likewise. + (emit_output_reload_insns, delete_output_reload): Likewise. + * caller-save.c (allocate_initial_values): Prototype moved here. + * ira.c (init_reg_equiv_memory_loc): Remove. + (ira): Recompute max_regno and call grow_reg_equivs. + * ira-reload.c (copy_allocno_for_spilling): New. + (create_allocno_for_spilling): Simplify. + * integrate.c: Include "reload.h" + * integrate.h (allocate_initial_values): Remove prototype. + * reload1.c: Include ggc.h. + (reg_equiv_constant, reg_equiv_invariant): Remove. + (reg_equiv_invariant, reg_equiv_memory_loc): Likewise. + (reg_equiv_memory_loc_vec, reg_equiv_address): Likewise. + (reg_equiv_mem, reg_equiv_alt_mem_list): Likewise. + (reg_equiv_init, reg_equiv_init_size): Likewise. + (reg_equivs, grow_reg_equivs): New. + (reload): Call grow_reg_equivs. Remove extraneous df calls. + (calculate_elim_costs_all_insns): Do not free reg_equivs. + (reload): Remove extraneous df calls. + +2010-12-01 Jeff Law <law@redhat.com> + + * reload1.c (allocate_reload_reg): Remove set, but unused variable. + + * reload1.c: Include params.h. + (df_initialized): Declare. + (reload): Set df_initialized. + (mark_spill_regs): Rename last argument to index. Simplify + array addressing arithmetic. + (allocate_reload_reg): Avoid insn scan if there is only one + spill register. Lazily perform the DF rescan. Clamp the number + of insns scanned. Changes to faciliate array addressing simplification + in mark_spill_regs. Remove unnecessary loop to map from regno to + index into spill_reg_rtx. + * params.def (PARAM_MAX_RELOAD_FORWARD_SEARCH_INSNS): New param. + +2010-11-18 Jeff Law <law@redhat.com> + + * reload1.c (REG_OR_SUBREG_P): Define. + (reload): Update DF information just prior to emitting reloads. + (mark_spill_regs): New. + (allocate_reload_reg): Scan forward from the insn needing reloads + to determine desirability of each spill reg. + +2010-08-04 Jeff Law <law@redhat.com> + + * ira-reload.c (create_new_allocno_for_spilling): Set the new allocno's + cover class before creating its underlying objects. Set the underlying + object's conflicts to include ira_no_alloc_regs and regs not in the + cover class. + +2010-07-26 Jeff Law <law@redhat.com> + + * ira-reload.c (create_new_allocno_for_spilling): Changes to allow + an allocno to have > 1 underlying object. + (maybe_add_conflict): Likewise. + (ira_reload): Likewise. + +2010-07-21 Jeff Law <law@redhat.com> + + * ira-reload.c (create_new_allocno_for_spilling): Create the + underlying object as well. + +2010-07-07 Jeff Law <law@redhat.com> + + * ira-reload.c: Revert change from Jan 14. It is not safe due to + assumptions in reload. + + * ira-reload.c (create_new_allocno_for_spilling): Add some debug dump + support. + +2010-06-28 Jeff Law <law@redhat.com> + + * ira-reload.c (create_new_allocno_for_spilling): Drop 'allocno' + from live_range_t. + * ira-build.c (remove_allocno_conflicts): Use CLEAR_MINMAX_SET_BIT + instead of CLEAR_ALLOCNO_SET_BIT. + +2010-06-21 Jeff Law <law@redhat.com> + + * ira-reload.c (localize_pseudos): Use GGC_RESIZEVEC rather than + ggc_realloc_stat. + +2010-06-16 Jeff Law <law@redhat.com> + + * ira-reload.c (localize_pseudos): Use ggc_realloc_stat. + +2010-06-09 Jeff Law <law@redhat.com> + + * ira-build.c (initiate_cost_vectors): Initiate a cost vector for + NO_REGS. + * ira-reload.c (ira_bad_reload_regno, ira_bad_reload_regno_1): Delete. + * ira.h (ira_bad_reload_regno): Delete redundant prototype. + * ira-costs.c (ira_costs): Do not call calculate_elim_costs_all_insns + when computing costs for pseudos/allocnos created by ira-reload. + +2010-05-10 Jeff Law <law@redhat.com> + + * ira-reload.c (localize_pseudos): Abort if we discover newly created + pseudos w/o allocnos. + + * ira-reload.c (emit_localizing_loads): Fix typo. + +2010-04-27 Jeff Law <law@redhat.com> + + * ira-reload.c (ira_reload): Remove set, but not used 'tail' variable. + * reload1.c (reload): Similarly for 'n' variable. + +2010-02-16 Jeff Law <law@redhat.com> + + * ira-reload.c (create_new_allocno_for_spilling): No longer + copy HARD_REG_COSTS from the original allocno. + (ira_reload): Call ira_tune_allocno_costs_and_cover_classes and + ira_costs. + * ira-costs.c (find_costs_and_classes): New argument MIN_REGNO. Only + update reg classes for pseudos larger than MIN_REGNO. + (setup_allocno_cover_class_and_costs): Similarly, but for cover + classes. + (ira_costs): New argument MIN_REGNO. Pass along to + find_costs_and_classes and setup_allocno_cover_class_and_costs. + (ira_set_pseudo_classes): Corresponding changes. + * ira-int.h (ira_costs): Update prototype. + * ira_build.c (ira_build): Pass in new argument to ira_costs. + + * ira-reload.c (create_new_allocno_for_spilling): Zero CALL_FREQ + CALLS_CROSSED_NUM and NEXT_REGNO_ALLOCNO fields. + (build_conflicts_for_new_allocnos): Update CALL_FREQ and + CALLS_CROSSED_NUM for new allocnos as needed. + +2010-02-11 Jeff Law <law@redhat.com> + + * ira-reload.c (ira_reload): Recompute DF info immediately after + localizing pseudos. + +2010-02-02 Jeff Law <law@redhat.com> + + * ira-conflicts.c (process_regs_for_copy): Skip conflicts test if + CONFLICTS is NULL. + (add_insn_allocno_copies): Similarly for REG_DEAD notes. + (ira_add_copies): Renamed from add_copies. + (ira_build_conflicts): Corresponding changes. Clear CONFLICTS after + freeing memory. + * ira-int.h (ira_add_copies): Prototype. + * ira-build.c (finish_loop_tree_node): New argument to control + deletion of the regno_allocno_map. + (finish_loop_tree_nodes): Pass in new argument to + finish_loop_tree_node. Do not delete regno_allocno_map here. + (remove_unnecessary_regions): Pass in new argument to + finish_loop_tree_node. + * ira-reload.c (fix_allocno_map): New function. + (build_conflicts_for_new_allocnos): Remove copy detection. + (ira_reload): Call fix_allocno_map and ira_add_copies. + +2010-01-26 Jeff Law <law@redhat.com> + + * ira-reload.c (emit_localizing_loads): Avoid creating unnecessary + pseudos. + + * ira-reload.c (build_conflicts_for_new_allocnos): Fix typo. + +2010-01-14 Jeff Law <law@redhat.com> + + * ira-reload.c (build_conflicts_for_new_allocnos): Avoid unnecessary + conflicts between allocnos appearing in a simple copy. + (mark_conflicts): Corresponding changes. + +2009-12-16 Jeff Law <law@redhat.com> + + * ira-reload.c (emit_localizing_stores): Use MEM directly in more + read-modify-write insns. + +2009-12-04 Jeff Law <law@redhat.com> + + * ira-reload.c (build_conflicts_for_new_allocnos): Look for + copies to/from new pseudos/allocnos and create ira copies for them. + + * ira-color.c (ira_reassign_conflict_pseudos): Remove bogus asserts. + Free UPDATED_HARD_REG_COSTS and UPDATED_CONFLICT_HARD_REG_COSTS. + (allocno_reload_assign): Likewise. + + +2009-12-02 Jeff Law <law@redhat.com> + + * ira-reload.c (create_new_allocno_for_spilling): If the original + allocno conflicts with all hard regs, then make the new allocno + conflict with all hard regs too. + + * ira-reload.c (build_conflicts_for_new_allocnos): Clear + ELIMINABLE_REGSET and IRA_NO_ALLOC_REGS from LIVE. + + * ira-reload.c (emit_localizing_stores): Allow replacement of + pseudo with MEM if there are 2 stores and no uses. + +2009-12-01 Jeff Law <law@redhat.com> + + * ira-reload.c (no_uses_after_this_set): New function. + (emit_localizing_stores): Use no_uses_after_this_set, accept + new argument TAIL. Remove references to no_uses_after_last_set. + (localize_pseudos): Remove references to no_uses_after_last_set. + (identify_singleton_sets): Likewise. + (no_uses_after_last_set): Delete bitmap. + + * ira-reload.c (emit_localizing_stores): Decrement pseudo_nsets + if we eliminate a store to a pseudo we want to localize. Do not + clear regs_to_load if we eliminated a store to the pseudo reg. + +2009-11-18 Jeff Law <law@redhat.com> + + * ira-reload.c (build_conflicts_for_new_allocnos): Take HEAD and + TAIL arguments instead of BB. Iterate from tail back to head building + conflicts. + (localize_pseudos): Accept new VISITED argument. Operate on + extended basic blocks starting with BB. + (ira_reload): Refine to work with extended basic blocks. + +2009-11-10 Jeff Law <law@redhat.com> + + * ira-reload.c (localize_pseudos): Set REG_FREQ for the newly + created pseudos. + (create_new_allocno_for_spilling): Set ALLOCNO_FREQ based on the + new pseudo's REG_FREQ. + + 2009-11-10 Vladimir Makarov <vmakarov@redhat.com> + * ira-color.c (allocno_reload_assign): Avoid accumulating + reload registers in ALLOCNO_TOTAL_CONFLICT_HARD_REGS. + + * ira-reload.c (ira_bad_reload_regno_1): If the pseudo wants a + singleton register, then consider everything else bad. + +2009-11-05 Jeff Law <law@redhat.com> + + * ira-color.c (ira_reassign_pseudos): Collect all pseudos for + reassignment rather than doing them in two passes. + * reload1.c (finish_spills): Remove Oct 22, 2009 change as it's + no longer needed. + + * ira-reload.c (create_new_allocno_for_spilling): Copy the + UPDATED_MEMORY_COST and HARD_REG_COSTS from the old to the + new allocno. + +2009-11-04 Jeff Law <law@redhat.com> + + * ira-color.c (ira_reassign_pseudos): Only try to reassign + conflicting pseudos if requested by caller. + * ira.h (ira_reassign_pseudos): Update prototype. + * reload1.c (finish_spills): Update call to ira_reassign_pseudos. + * ira-reload.c (ira_reload): Likewise. + +2009-10-29 Jeff Law <law@redhat.com> + + * ira-build.c (update_conflict_hard_reg_costs): Do not + die if ALLOCNO_HARD_REG_COSTS is empty. + +2009-10-28 Jeff Law <law@redhat.com> + + 2009-10-22 Vladimir Makarov <vmakarov@redhat.com> + * ira-build.c (update_conflict_hard_reg_costs): New. + (ira_build): Call update_conflict_hard_reg_costs. + + * ira-reload.c (ira_reload): Clear REG_N_REFS and REG_N_SETS for + localized pseudos. + +2009-10-22 Jeff Law <law@redhat.com> + + * reload1.c (finish_spills): Also try to re-allocate pseudos + currently on the stack. + + * ira-lives.c (process_single_reg_class_operands): Update the + hard reg costs for all the hard registers desired by the single + reg class operand. + +2009-10-15 Jeff Law <law@redhat.com> + + * ira-reload.c (ira_bad_reload_regno): New function. + (ira_bad_reload_regno_1): Likewise. + * ira.h (ira_bad_reload_regno): Declare. + * reload1.c (allocate_reload_reg): Use ira_bad_reload_regno. + +2009-10-13 Jeff Law <law@redhat.com> + + * ira-reload.c (no_uses_after_last_set): New bitmap. + (identify_singleton_sets): Set it appropriately. + (emit_localizing_stores): More aggressively replace a pseudo + with its equivalent memory location. Indicate that INSN + should be deleted in certain cases. + (localize_pseudos): Allocate and free no_uses_after_last_set. + Fix SET/USE processing order for no_uses_after_last_set. + If emit_localizing_stores requests an insn be deleted, do it. + +2009-09-31 Jeff Law <law@redhat.com> + + * ira-reload.c (emit_localizing_loads): Attempt to replace the + pseudo with its equivalent memory when safe and profitable. + +2009-09-30 Jeff Law <law@redhat.com> + + * ira-reload.c (uses_before_set): Remove bitmap. + (identify_singleton_uses): Corresponding changes. + (localize_pseudos): Likewise. + + * ira-reload.c (emit_localizing_stores): Return whether or not the + current insn needs to be rescanned by DF analyzer. + Attempt to replace the pseudo with its equivalent memory. + (localize_pseudos): Rescan as needed after emit_localizing_stores. + +2009-09-25 Jeff Law <law@redhat.com> + + * ira-reload.c (identify_singleton_uses): Twiddles for using DF rather + than note_stores/note_uses. + (identify_singleton_sets, collect_loads): Likewise. + (identify_singleton_uses_1): Remove. + (collect_loads_1, emit_localizing_loads_1): Likewise. + (emit_localizing_loads): Twiddles for using DF rather than + note_stores/note_uses. Return whether or not rescanning is needed. + (rename_sets, emit_localizing_stores): Likewise. + (localize_pseudos): Iterate over DF structures rather than using + note_stores/note_uses. Defer rescanning until we're finished with + the current insn. + +2009-09-24 Jeff Law <law@redhat.com> + + * ira-reload.c (emit_localizing_loads_1): Incrementally update DF. + (emit_localizing_stores): Likewise. + (create_new_allocno_for_spilling): Clear the hard register conflicts + for the new allocno. + (maybe_add_conflict): Handle hard register conflicts. + (mark_conflicts): Renamed from mark_conflict. Simplify slightly. + (mark_uses): New. + (live_uses, live_uses_1): Remove. + (build_conflicts_for_new_allocnos): Simplify and use DF. + +2009-09-22 Jeff Law <law@redhat.com> + + * ira-reload.c (build_conflicts_for_new_allocnos): Ignore debug + insns. + (localize_pseudos): Likewise. + +2009-09-21 Jeff Law <law@redhat.com> + + * ira-reload.c (localize_pseudos): Add new argument to call to + setup_reg_classes. + (emit_localizing_stores): Remove unused variable. + * reload1.c (reload): Pass in the right peudo register number + to alter_reg. Restore incorrectly removed initialization of + tmp_pseudo_reg_arr. + + * NOTES: New file. + + * ira-reload.c: New file. + * ira-int.h (reload_reload): Declare. + * ira-color.c (sorted_allocnos): No longer static. + * ira.c (ira): Call ira_reload. + * Makefile.in (OBJS-common): Add ira-reload.o + (ira-reload.o): Add dependencies + + * ira-color.c (setup_allocno_priorities): No longer static. + * ira-int.h (setup_allocno_priorities): Declare + + * reload1.c (reg_max_ref_width): No longer static. + (alter_reg): Likewise. + (record_equivalences_for_reload): New function extracted from reload. + (reload): Don't sort registers prior to calling alter_reg anymore. + * reload.h (reg_max_ref_width): Declare. + (alter_reg): Likewise. + (record_equivalences_for_reload): Likewise. + +2009-09-18 Jeff Law <law@redhat.com> + + * ira.c (expand_reg_info): No longer static. + * ira-int.h (expand_reg_info): Declare. + + * ira-build.c (remove_from_all_conflicts): New function. + (remove_allocno_conflicts): New function. + * ira-int.h (remove_from_all_conflicts): Declare. + + 2009-09-14 Vladimir Makarov <vmakarov@redhat.com> + * ira.c (ira_non_ordered_class_hard_regs): Define. + (setup_class_hard_regs): Initialize ira_non_ordered_class_hard_regs. + * ira-int.h (ira_non_ordered_class_hard_regs): Declare. + * ira-costs.c (ira_tune_allocno_costs_and_cover_classes): Increase + cost of unaligned hard regs when allocating multi-reg pseudos. + + * regstat.c (regstat_reallocate_ri): New function. + * regs.h (regstat_reallocate_ri): Declare + + * ira-conflicts.c (print_allocno_conflicts): New function to print + conflicts for a single allocno. + (print_conflicts): Use print_allocno_conflicts. + + diff --git a/gcc/Makefile.in b/gcc/Makefile.in index ae4f4da2b3e..f49e1826d6a 100644 --- a/gcc/Makefile.in +++ b/gcc/Makefile.in @@ -1269,6 +1269,7 @@ OBJS = \ ipa.o \ ira.o \ ira-build.o \ + ira-reload.o \ ira-costs.o \ ira-conflicts.o \ ira-color.o \ @@ -3300,6 +3301,12 @@ ira.o: ira.c $(CONFIG_H) $(SYSTEM_H) coretypes.h \ $(BITMAP_H) hard-reg-set.h $(BASIC_BLOCK_H) \ $(EXPR_H) $(RECOG_H) $(PARAMS_H) $(TIMEVAR_H) $(TREE_PASS_H) output.h \ $(EXCEPT_H) reload.h toplev.h $(DIAGNOSTIC_CORE_H) $(INTEGRATE_H) $(DF_H) $(GGC_H) $(IRA_INT_H) +ira-reload.o: ira-reload.c $(CONFIG_H) $(SYSTEM_H) coretypes.h \ + $(TARGET_H) $(TM_H) $(RTL_H) $(RECOG_H) \ + $(REGS_H) hard-reg-set.h $(FLAGS_H) $(OBSTACK_H) \ + $(EXPR_H) $(BASIC_BLOCK_H) $(TOPLEV_H) $(TM_P_H) \ + $(DF_H) $(IRA_INT_H) $(PARAMS_H) $(TIMEVAR_H) $(INTEGRATE_H) \ + $(TREE_PASS_H) output.h regmove.o : regmove.c $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) $(RTL_H) \ insn-config.h $(TIMEVAR_H) $(TREE_PASS_H) $(DF_H) \ $(RECOG_H) output.h $(REGS_H) hard-reg-set.h $(FLAGS_H) $(FUNCTION_H) \ diff --git a/gcc/config/sparc/gas.h b/gcc/config/sparc/gas.h new file mode 100644 index 00000000000..e3779293532 --- /dev/null +++ b/gcc/config/sparc/gas.h @@ -0,0 +1,6 @@ +/* Definitions of target machine for GCC, for SPARC + using the GNU assembler. */ + +/* Switch into a generic section. */ +#undef TARGET_ASM_NAMED_SECTION +#define TARGET_ASM_NAMED_SECTION default_elf_asm_named_section diff --git a/gcc/ira-build.c b/gcc/ira-build.c index 95d6c169a27..fef2fac243b 100644 --- a/gcc/ira-build.c +++ b/gcc/ira-build.c @@ -184,7 +184,7 @@ more_one_region_p (void) /* Free the loop tree node of a loop. */ static void -finish_loop_tree_node (ira_loop_tree_node_t loop) +finish_loop_tree_node (ira_loop_tree_node_t loop, bool delete_regno_allocno_map) { if (loop->regno_allocno_map != NULL) { @@ -193,8 +193,11 @@ finish_loop_tree_node (ira_loop_tree_node_t loop) ira_free_bitmap (loop->border_allocnos); ira_free_bitmap (loop->modified_regnos); ira_free_bitmap (loop->all_allocnos); - ira_free (loop->regno_allocno_map); - loop->regno_allocno_map = NULL; + if (delete_regno_allocno_map) + { + ira_free (loop->regno_allocno_map); + loop->regno_allocno_map = NULL; + } } } @@ -206,7 +209,7 @@ finish_loop_tree_nodes (void) loop_p loop; FOR_EACH_VEC_ELT (loop_p, ira_loops.larray, i, loop) - finish_loop_tree_node (&ira_loop_nodes[i]); + finish_loop_tree_node (&ira_loop_nodes[i], false); ira_free (ira_loop_nodes); for (i = 0; i < (unsigned int) last_basic_block_before_change; i++) { @@ -218,8 +221,6 @@ finish_loop_tree_nodes (void) ira_free_bitmap (ira_bb_nodes[i].modified_regnos); if (ira_bb_nodes[i].all_allocnos != NULL) ira_free_bitmap (ira_bb_nodes[i].all_allocnos); - if (ira_bb_nodes[i].regno_allocno_map != NULL) - ira_free (ira_bb_nodes[i].regno_allocno_map); } ira_free (ira_bb_nodes); } @@ -668,6 +669,53 @@ ira_allocate_object_conflicts (ira_object_t obj, int num) allocate_conflict_bit_vec (obj); } +/* Remove OBJ2 from the conflicts of OBJ1. */ +static void +remove_allocno_conflicts (ira_object_t obj1, ira_object_t obj2) +{ + int num, i; + + if (OBJECT_CONFLICT_VEC_P (obj1)) + { + ira_object_t *vec + = (ira_object_t *) OBJECT_CONFLICT_VEC (obj1); + num = OBJECT_NUM_CONFLICTS (obj1) + 2; + + for (i = 0; i < num; i++) + if (vec[i] == obj2) + { + num--; + if (i != num - 2) + vec[i] = vec[num - 2]; + vec[num - 2] = NULL; + OBJECT_NUM_CONFLICTS (obj1)--; + } + } + else + { + int id = OBJECT_CONFLICT_ID (obj2); + IRA_INT_TYPE *vec; + + if (id < OBJECT_MIN (obj1) || id > OBJECT_MAX (obj1)) + return; + + vec = (IRA_INT_TYPE *) OBJECT_CONFLICT_ARRAY (obj1); + CLEAR_MINMAX_SET_BIT (vec, id, OBJECT_MIN (obj1), OBJECT_MAX (obj1)); + } +} + +/* Remove A from all conflicts. */ +void +remove_from_all_conflicts (ira_object_t to_remove) +{ + ira_object_conflict_iterator oci; + ira_object_t o; + + FOR_EACH_OBJECT_CONFLICT (to_remove, o, oci) + remove_allocno_conflicts (o, to_remove); +} + + /* Add OBJ2 to the conflicts of OBJ1. */ static void add_to_conflicts (ira_object_t obj1, ira_object_t obj2) @@ -753,7 +801,7 @@ add_to_conflicts (ira_object_t obj1, ira_object_t obj2) } /* Add OBJ1 to the conflicts of OBJ2 and vice versa. */ -static void +void ira_add_conflict (ira_object_t obj1, ira_object_t obj2) { add_to_conflicts (obj1, obj2); @@ -1398,6 +1446,11 @@ initiate_cost_vectors (void) sizeof (int) * ira_class_hard_regs_num[aclass], 100); } + cost_vector_pool[NO_REGS] + = create_alloc_pool ("cost vectors", + sizeof (int) + * ira_class_hard_regs_num[NO_REGS], + 100); } /* Allocate and return a cost vector VEC for ACLASS. */ @@ -2239,7 +2292,7 @@ remove_unnecessary_regions (bool all_p) else remove_unnecessary_allocnos (); while (VEC_length (ira_loop_tree_node_t, removed_loop_vec) > 0) - finish_loop_tree_node (VEC_pop (ira_loop_tree_node_t, removed_loop_vec)); + finish_loop_tree_node (VEC_pop (ira_loop_tree_node_t, removed_loop_vec), true); VEC_free (ira_loop_tree_node_t, heap, removed_loop_vec); } @@ -3046,7 +3099,7 @@ ira_build (bool loops_p) create_loop_tree_nodes (loops_p); form_loop_tree (); create_allocnos (); - ira_costs (); + ira_costs (FIRST_PSEUDO_REGISTER); create_allocno_objects (); ira_create_allocno_live_ranges (); remove_unnecessary_regions (false); diff --git a/gcc/ira-color.c b/gcc/ira-color.c index 2b21fdd8ad2..9dee462f2c6 100644 --- a/gcc/ira-color.c +++ b/gcc/ira-color.c @@ -164,7 +164,7 @@ static bitmap coloring_allocno_bitmap; static bitmap consideration_allocno_bitmap; /* All allocnos sorted according their priorities. */ -static ira_allocno_t *sorted_allocnos; +ira_allocno_t *sorted_allocnos; /* Vec representing the stack of allocnos used during coloring. */ static VEC(ira_allocno_t,heap) *allocno_stack_vec; @@ -2329,7 +2329,7 @@ static int *allocno_priorities; /* Set up priorities for N allocnos in array CONSIDERATION_ALLOCNOS. */ -static void +void setup_allocno_priorities (ira_allocno_t *consideration_allocnos, int n) { int i, length, nrefs, priority, max_priority, mult; @@ -4052,7 +4052,8 @@ ira_reassign_pseudos (int *spilled_pseudo_regs, int num, HARD_REG_SET bad_spill_regs, HARD_REG_SET *pseudo_forbidden_regs, HARD_REG_SET *pseudo_previous_regs, - bitmap spilled) + bitmap spilled, + bool reassign_conflict_pseudos) { int i, n, regno; bool changed_p; @@ -4064,35 +4065,36 @@ ira_reassign_pseudos (int *spilled_pseudo_regs, int num, SPILLED_PSEUDO_REGS to SPILLED_PSEUDO_REGS. This is preferable to allocating in two steps as some of the conflicts might have a higher priority than the pseudos passed in SPILLED_PSEUDO_REGS. */ - for (i = 0; i < num; i++) - bitmap_set_bit (temp, spilled_pseudo_regs[i]); - - for (i = 0, n = num; i < n; i++) + if (reassign_conflict_pseudos) { - int nr, j; - int regno = spilled_pseudo_regs[i]; - bitmap_set_bit (temp, regno); + for (i = 0; i < num; i++) + bitmap_set_bit (temp, spilled_pseudo_regs[i]); - a = ira_regno_allocno_map[regno]; - nr = ALLOCNO_NUM_OBJECTS (a); - for (j = 0; j < nr; j++) + for (i = 0, n = num; i < n; i++) { - ira_object_t conflict_obj; - ira_object_t obj = ALLOCNO_OBJECT (a, j); - ira_object_conflict_iterator oci; + int nr, j; + int regno = spilled_pseudo_regs[i]; + bitmap_set_bit (temp, regno); - FOR_EACH_OBJECT_CONFLICT (obj, conflict_obj, oci) + a = ira_regno_allocno_map[regno]; + nr = ALLOCNO_NUM_OBJECTS (a); + for (j = 0; j < nr; j++) { - ira_allocno_t conflict_a = OBJECT_ALLOCNO (conflict_obj); - if (ALLOCNO_HARD_REGNO (conflict_a) < 0 - && ! ALLOCNO_DONT_REASSIGN_P (conflict_a) - && bitmap_set_bit (temp, ALLOCNO_REGNO (conflict_a))) - { - spilled_pseudo_regs[num++] = ALLOCNO_REGNO (conflict_a); - /* ?!? This seems wrong. */ - bitmap_set_bit (consideration_allocno_bitmap, - ALLOCNO_NUM (conflict_a)); - } + ira_object_t conflict_obj; + ira_object_t obj = ALLOCNO_OBJECT (a, j); + ira_object_conflict_iterator oci; + + FOR_EACH_OBJECT_CONFLICT (obj, conflict_obj, oci) + { + ira_allocno_t conflict_a = OBJECT_ALLOCNO (conflict_obj); + if (ALLOCNO_HARD_REGNO (conflict_a) < 0 + && ! ALLOCNO_DONT_REASSIGN_P (conflict_a) + && ! bitmap_bit_p (temp, ALLOCNO_REGNO (conflict_a))) + { + spilled_pseudo_regs[num++] = ALLOCNO_REGNO (conflict_a); + bitmap_set_bit (temp, ALLOCNO_REGNO (conflict_a)); + } + } } } } diff --git a/gcc/ira-conflicts.c b/gcc/ira-conflicts.c index 225a3ab17f2..5601f9df8b4 100644 --- a/gcc/ira-conflicts.c +++ b/gcc/ira-conflicts.c @@ -419,7 +419,7 @@ process_regs_for_copy (rtx reg1, rtx reg2, bool constraint_p, { ira_allocno_t a1 = ira_curr_regno_allocno_map[REGNO (reg1)]; ira_allocno_t a2 = ira_curr_regno_allocno_map[REGNO (reg2)]; - if (!allocnos_conflict_for_copy_p (a1, a2) && offset1 == offset2) + if (!conflicts || (!allocnos_conflict_for_copy_p (a1, a2) && offset1 == offset2)) { cp = ira_add_allocno_copy (a1, a2, freq, constraint_p, insn, ira_curr_loop_tree_node); @@ -508,10 +508,10 @@ add_insn_allocno_copies (rtx insn) if ((set = single_set (insn)) != NULL_RTX && REG_SUBREG_P (SET_DEST (set)) && REG_SUBREG_P (SET_SRC (set)) && ! side_effects_p (set) - && find_reg_note (insn, REG_DEAD, + && (! conflicts || find_reg_note (insn, REG_DEAD, REG_P (SET_SRC (set)) ? SET_SRC (set) - : SUBREG_REG (SET_SRC (set))) != NULL_RTX) + : SUBREG_REG (SET_SRC (set))) != NULL_RTX)) { process_regs_for_copy (SET_DEST (set), SET_SRC (set), false, insn, freq); @@ -562,8 +562,8 @@ add_insn_allocno_copies (rtx insn) } /* Add copies originated from BB given by LOOP_TREE_NODE. */ -static void -add_copies (ira_loop_tree_node_t loop_tree_node) +void +ira_add_copies (ira_loop_tree_node_t loop_tree_node) { basic_block bb; rtx insn; @@ -859,7 +859,7 @@ ira_build_conflicts (void) ira_object_iterator oi; build_conflicts (); - ira_traverse_loop_tree (true, ira_loop_tree_root, NULL, add_copies); + ira_traverse_loop_tree (true, ira_loop_tree_root, NULL, ira_add_copies); /* We need finished conflict table for the subsequent call. */ if (flag_ira_region == IRA_REGION_ALL || flag_ira_region == IRA_REGION_MIXED) @@ -873,6 +873,7 @@ ira_build_conflicts (void) ira_free (conflicts[OBJECT_CONFLICT_ID (obj)]); } ira_free (conflicts); + conflicts = NULL; } } base = base_reg_class (VOIDmode, ADDR_SPACE_GENERIC, ADDRESS, SCRATCH); diff --git a/gcc/ira-costs.c b/gcc/ira-costs.c index 4fa12a2ae19..4bd822ddd5b 100644 --- a/gcc/ira-costs.c +++ b/gcc/ira-costs.c @@ -1480,7 +1480,7 @@ process_bb_node_for_costs (ira_loop_tree_node_t loop_tree_node) and their best costs. Set up preferred, alternative and allocno classes for pseudos. */ static void -find_costs_and_classes (FILE *dump_file) +find_costs_and_classes (FILE *dump_file, int min_regno) { int i, k, start, max_cost_classes_num; int pass; @@ -1716,7 +1716,8 @@ find_costs_and_classes (FILE *dump_file) best = alt_class = NO_REGS; else if (best == alt_class) alt_class = NO_REGS; - setup_reg_classes (i, best, alt_class, regno_aclass[i]); + if (i >= min_regno) + setup_reg_classes (i, best, alt_class, regno_aclass[i]); if ((!allocno_p || internal_flag_ira_verbose > 2) && dump_file != NULL) fprintf (dump_file, @@ -1883,7 +1884,7 @@ process_bb_node_for_hard_reg_moves (ira_loop_tree_node_t loop_tree_node) its class and modify hard register cost because insns moving allocno to/from hard registers. */ static void -setup_allocno_class_and_costs (void) +setup_allocno_class_and_costs (int min_regno) { int i, j, n, regno, hard_regno, num; int *reg_costs; @@ -1895,9 +1896,15 @@ setup_allocno_class_and_costs (void) ira_assert (allocno_p); FOR_EACH_ALLOCNO (a, ai) { + if (ALLOCNO_REGNO (a) < min_regno) + continue; + i = ALLOCNO_NUM (a); regno = ALLOCNO_REGNO (a); - aclass = regno_aclass[regno]; + if (min_regno == FIRST_PSEUDO_REGISTER) + aclass = regno_aclass[regno]; + else + aclass = ALLOCNO_CLASS (a); cost_classes_ptr = regno_cost_classes[regno]; ira_assert (pref[i] == NO_REGS || aclass != NO_REGS); ALLOCNO_MEMORY_COST (a) = COSTS (costs, i)->mem_cost; @@ -2032,7 +2039,7 @@ finish_costs (void) /* Entry function which defines register class, memory and hard register costs for each allocno. */ void -ira_costs (void) +ira_costs (int min_regno) { allocno_p = true; cost_elements_num = ira_allocnos_num; @@ -2040,9 +2047,10 @@ ira_costs (void) total_allocno_costs = (struct costs *) ira_allocate (max_struct_costs_size * ira_allocnos_num); initiate_regno_cost_classes (); - calculate_elim_costs_all_insns (); - find_costs_and_classes (ira_dump_file); - setup_allocno_class_and_costs (); + if (min_regno == FIRST_PSEUDO_REGISTER) + calculate_elim_costs_all_insns (); + find_costs_and_classes (ira_dump_file, min_regno); + setup_allocno_class_and_costs (min_regno); finish_regno_cost_classes (); finish_costs (); ira_free (total_allocno_costs); @@ -2057,7 +2065,7 @@ ira_set_pseudo_classes (FILE *dump_file) cost_elements_num = max_reg_num (); init_costs (); initiate_regno_cost_classes (); - find_costs_and_classes (dump_file); + find_costs_and_classes (dump_file, FIRST_PSEUDO_REGISTER); finish_regno_cost_classes (); pseudo_classes_defined_p = true; finish_costs (); diff --git a/gcc/ira-int.h b/gcc/ira-int.h index 1db9b411e0b..8f66d93e72c 100644 --- a/gcc/ira-int.h +++ b/gcc/ira-int.h @@ -938,6 +938,7 @@ extern void ira_print_disposition (FILE *); extern void ira_debug_disposition (void); extern void ira_debug_allocno_classes (void); extern void ira_init_register_move_cost (enum machine_mode); +extern void expand_reg_info (int); /* The length of the two following arrays. */ extern int ira_reg_equiv_len; @@ -995,12 +996,15 @@ extern void ira_free_cost_vector (int *, reg_class_t); extern void ira_flattening (int, int); extern bool ira_build (bool); extern void ira_destroy (void); +extern void remove_from_all_conflicts (ira_object_t to_remove); +extern void ira_add_conflict (ira_object_t, ira_object_t); + /* ira-costs.c */ extern void ira_init_costs_once (void); extern void ira_init_costs (void); extern void ira_finish_costs_once (void); -extern void ira_costs (void); +extern void ira_costs (int); extern void ira_tune_allocno_costs (void); /* ira-lives.c */ @@ -1017,6 +1021,7 @@ extern void ira_finish_allocno_live_ranges (void); /* ira-conflicts.c */ extern void ira_debug_conflicts (bool); extern void ira_build_conflicts (void); +extern void ira_add_copies (ira_loop_tree_node_t); /* ira-color.c */ extern void ira_debug_hard_regs_forest (void); @@ -1025,12 +1030,15 @@ extern void ira_reassign_conflict_allocnos (int); extern void ira_initiate_assign (void); extern void ira_finish_assign (void); extern void ira_color (void); +extern void setup_allocno_priorities (ira_allocno_t *, int); /* ira-emit.c */ extern void ira_initiate_emit_data (void); extern void ira_finish_emit_data (void); extern void ira_emit (bool); +/* ira-reload.c */ +extern void ira_reload (void); /* Initialize register costs for MODE if necessary. */ diff --git a/gcc/ira-reload.c b/gcc/ira-reload.c new file mode 100644 index 00000000000..0a444c88a86 --- /dev/null +++ b/gcc/ira-reload.c @@ -0,0 +1,1595 @@ +/* Integrated Register Allocator (IRA) reloading . + Copyright (C) 2009, 2010, 2011 + Free Software Foundation, Inc. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 3, or (at your option) any later +version. + +GCC is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +<http://www.gnu.org/licenses/>. */ + +#include "config.h" +#include "system.h" +#include "coretypes.h" +#include "tm.h" +#include "regs.h" +#include "rtl.h" +#include "tm_p.h" +#include "target.h" +#include "flags.h" +#include "obstack.h" +#include "bitmap.h" +#include "hard-reg-set.h" +#include "basic-block.h" +#include "expr.h" +#include "recog.h" +#include "params.h" +#include "timevar.h" +#include "tree-pass.h" +#include "output.h" +#include "except.h" +#include "reload.h" +#include "errors.h" +#include "integrate.h" +#include "df.h" +#include "ggc.h" +#include "ira-int.h" + + +static bitmap pseudos_to_localize; +static bitmap regs_to_load; +static bitmap regs_to_load_alt; +static bitmap regs_to_store; +static bitmap regs_to_store_alt; +static int *pseudo_nuses; +static int *pseudo_nsets; +static rtx *reg_map; +static rtx *alt_reg_map; + +/* Return true if REG is a pseudo which should be localized, return false + otherwise. */ + +static bool +localize_pseudo_p (unsigned int regno) +{ + /* If this pseudo got a hard register, then we obviously do not want to + localize it. */ + if (reg_renumber [regno] != -1) + return false; + + /* Avoid localizing a pseudo which can be rematerialized. + ?!? I think this is a holdover from ancient code and may no longer + be necessary. */ + if ((reg_equivs && VEC_index (reg_equivs_t, reg_equivs, regno)->constant) + || (reg_equivs && VEC_index (reg_equivs_t, reg_equivs, regno)->invariant)) + return false; + + /* If we don't know what register class to use for the psuedo, then + we don't try to localize it. + ?!? ISTM we should go ahead and localize as the localized pseudo + should be easier for reload to handle. */ + if (reg_preferred_class (regno) == NO_REGS) + return false; + + /* If the pseudo is already local to a block, then there's no point + in localizing it. */ + if (REG_BASIC_BLOCK (regno) != REG_BLOCK_GLOBAL + && REG_BASIC_BLOCK (regno) != REG_BLOCK_UNKNOWN) + return false; + + return true; +} + + +/* Alter each pseudo-reg rtx to contain its hard reg number. Assign + stack slots to the pseudos that lack hard regs or equivalents. + Do not touch virtual registers. + + ?!? This assigns for local pseudos too, which might be wasteful + as we get another chance to allocate those. */ +static void +assign_stack_slots (void) +{ + unsigned i, n, max_regno = max_reg_num (); + int *temp_pseudo_reg_arr; + + /* ?!? This should be cleaned up or go away. */ + ira_spilled_reg_stack_slots_num = 0; + + + temp_pseudo_reg_arr = XNEWVEC (int, max_regno - LAST_VIRTUAL_REGISTER - 1); + n = 0; + for (n = 0, i = LAST_VIRTUAL_REGISTER + 1; i < max_regno; i++) + temp_pseudo_reg_arr[n++] = i; + + /* Ask IRA to order pseudo-registers for better stack slot + sharing. */ + ira_sort_regnos_for_alter_reg (temp_pseudo_reg_arr, n, reg_max_ref_width); + + for (i = 0; i < n; i++) + alter_reg (temp_pseudo_reg_arr[i], -1, false); + + free (temp_pseudo_reg_arr); +} + + +/* Count uses of USE (into pseudo_nuses) if USE is a register marked for + localization. */ + +static void +identify_singleton_uses (rtx use, bitmap subregs_to_decompose, int df_flags) +{ + if (GET_CODE (use) == SUBREG) + use = SUBREG_REG (use); + + if (GET_CODE (use) != REG) + return; + + /* If this reference is an implicit use via a subreg and the referenced + subreg is marked for decomposition, then we can ignore the implicit + use as it will disappear when the subreg is decomposed. */ + if (((df_flags & (DF_REF_SUBREG | DF_REF_READ_WRITE)) + == (DF_REF_SUBREG | DF_REF_READ_WRITE)) + && bitmap_bit_p (subregs_to_decompose, REGNO (use))) + return; + + if (bitmap_bit_p (pseudos_to_localize, REGNO (use))) + pseudo_nuses[REGNO (use)]++; +} + +/* Count assignments to DEST (into pseudo_nsets) if DEST is a register marked + for localization. */ + +static void +identify_singleton_sets (rtx dest) +{ + if (GET_CODE (dest) == SUBREG) + dest = SUBREG_REG (dest); + + if (GET_CODE (dest) != REG) + return; + + /* If DEST isn't maked for spilling, then there is nothing to do. */ + if (bitmap_bit_p (pseudos_to_localize, REGNO (dest))) + pseudo_nsets[REGNO (dest)]++; +} + +/* Return true if there are no uses of DEST between (START, STOP]. */ + +static bool +no_uses_after_this_set (rtx dest, rtx start, rtx stop) +{ + rtx insn; + + for (insn = NEXT_INSN (start); + insn != NEXT_INSN (BB_END (BLOCK_FOR_INSN (stop))); + insn = NEXT_INSN (insn)) + { + df_ref *use_rec; + rtx use = NULL_RTX; + + if (!NONDEBUG_INSN_P (insn)) + continue; + + for (use_rec = DF_INSN_USES (insn); *use_rec; use_rec++) + { + use = DF_REF_REG (*use_rec); + if (GET_CODE (use) == SUBREG) + use = SUBREG_REG (use); + + if (use == dest) + return false; + } + } + return true; +} + +/* Collect (into REGS_TO_LOAD) USE if it is a pseudo marked for + localization. */ + +static void +collect_loads (rtx use, bitmap subregs_to_decompose, int df_flags) +{ + rtx orig = use; + bool alt = false; + + if (GET_CODE (use) == SUBREG) + use = SUBREG_REG (use); + + if (GET_CODE (use) != REG) + return; + + /* If this reference is an implicit use via a subreg and the referenced + subreg is marked for decomposition, then we can ignore the implicit + use as it will disappear when the subreg is decomposed. */ + if (((df_flags & (DF_REF_SUBREG | DF_REF_READ_WRITE)) + == (DF_REF_SUBREG | DF_REF_READ_WRITE)) + && bitmap_bit_p (subregs_to_decompose, REGNO (use))) + return; + + /* If this is the high word access of a double word pseudo that was marked + for decomposition, then use the _alt arrays as needed. */ + if (GET_CODE (orig) == SUBREG + && GET_MODE_SIZE (GET_MODE (orig)) < GET_MODE_SIZE (GET_MODE (use)) + && bitmap_bit_p (subregs_to_decompose, REGNO (use)) + && SUBREG_BYTE (orig) != 0) + alt = true; + + if (bitmap_bit_p (pseudos_to_localize, REGNO (use))) + bitmap_set_bit ((alt ? regs_to_load_alt : regs_to_load), REGNO (use)); + + return; +} + +/* If USE refers to a pseudo marked in REGS_TO_LOAD, emit a load of + the pseudo before INSN. */ + +static int +emit_localizing_loads (rtx use, + rtx insn, + bitmap subregs_to_decompose, + int df_flags) +{ + rtx orig = use; + bool alt = false; + bool decompose = false; + + if (GET_CODE (use) == SUBREG) + use = SUBREG_REG (use); + + if (GET_CODE (use) != REG) + return 0; + + /* If this reference is an implicit use via a subreg and the referenced + subreg is marked for decomposition, then we can ignore the implicit + use as it will disappear when the subreg is decomposed. */ + if (((df_flags & (DF_REF_SUBREG | DF_REF_READ_WRITE)) + == (DF_REF_SUBREG | DF_REF_READ_WRITE)) + && bitmap_bit_p (subregs_to_decompose, REGNO (use))) + return 0; + + /* If this is word access of a double word pseudo that was marked for + decomposition, then decompose the double-word access to single word + accesses. */ + if (GET_CODE (orig) == SUBREG + && GET_MODE_SIZE (GET_MODE (orig)) < GET_MODE_SIZE (GET_MODE (use)) + && bitmap_bit_p (subregs_to_decompose, REGNO (use))) + decompose = true; + + /* If this is the high word access of a double word pseudo that was marked + for decomposition, then use the _alt arrays as needed. */ + if (decompose && SUBREG_BYTE (orig) != 0) + alt = true; + + if (bitmap_bit_p (pseudos_to_localize, REGNO (use))) + { + /* If this pseudo still needs a load, emit it. */ + if (bitmap_bit_p ((alt ? regs_to_load_alt : regs_to_load), REGNO (use))) + { + rtx insns, temp; + rtx mem + = VEC_index (reg_equivs_t, reg_equivs, REGNO (use))->memory_loc; + int nuses = pseudo_nuses[REGNO (use)]; + int nsets = pseudo_nsets[REGNO (use)]; + int occurrences = count_occurrences (PATTERN (insn), use, 0); + + mem = copy_rtx (mem); + + /* If we're decomposing a SUBREG, then the memory address needs + adjustment. */ + if (decompose) + mem = adjust_address_nv (mem, GET_MODE (orig), SUBREG_BYTE (orig)); + + /* validate_replace_rtx internally calls df_insn_rescan, which is + unsafe as our caller is iterating over the existing DF info. So + we have to turn off insn rescanning temporarily. */ + df_set_flags (DF_NO_INSN_RESCAN); + + /* If this insn has all the uses of a pseudo we want to localize + and the pseudo is never set, then try to replace the pseudo + with its equivalent memory location. */ + if (nsets == 0 + && (occurrences == nuses || nuses == 2) + && validate_replace_rtx ((decompose ? orig : use), mem, insn)) + { + df_clear_flags (DF_NO_INSN_RESCAN); + } + else + { + /* Create a new pseudo and record it in our map. */ + if ((alt ? alt_reg_map[(REGNO (use))] : reg_map [(REGNO (use))]) + == NULL) + { + if (alt) + alt_reg_map [REGNO (use)] = gen_reg_rtx (GET_MODE (orig)); + else + reg_map [REGNO (use)]= gen_reg_rtx ((decompose + ? GET_MODE (orig) + : GET_MODE (use))); + } + + df_clear_flags (DF_NO_INSN_RESCAN); + start_sequence (); + emit_move_insn ((alt + ? alt_reg_map [REGNO (use)] + : reg_map [REGNO (use)]), + mem); + insns = get_insns(); + end_sequence (); + emit_insn_before (insns, insn); + + /* Inform the DF framework about the new insns. */ + for (temp = insns; temp != insn; temp = NEXT_INSN (insns)) + df_insn_rescan (temp); + + /* Note it is no longer necessary to load this pseudo. */ + bitmap_clear_bit ((alt ? regs_to_load_alt : regs_to_load), + REGNO (use)); + } + } + + /* Replace the original pseudo with the new one. */ + if ((alt ? alt_reg_map [REGNO (use)] : reg_map [REGNO (use)])) + replace_rtx (insn, + (decompose ? orig : use), + (alt ? alt_reg_map [REGNO (use)] : reg_map [REGNO (use)])); + return 1; + } + return 0; +} + +/* DEST is an output for INSN. If the output is marked + for localizing, then we need to rename it to the new block-local + pseudo. This finishes the localization of unallocated globals. */ + +static int +rename_sets (rtx dest, rtx insn, bitmap subregs_to_decompose, int df_flags) +{ + rtx orig = dest; + bool decompose = false; + bool alt = false; + + if (GET_CODE (dest) == SUBREG) + dest = SUBREG_REG (dest); + + if (GET_CODE (dest) != REG) + return 0; + + /* If this is word access of a double word pseudo that was marked for + decomposition, then decompose the double-word access to single word + accesses. */ + if (GET_CODE (orig) == SUBREG + && GET_MODE_SIZE (GET_MODE (orig)) < GET_MODE_SIZE (GET_MODE (dest)) + && bitmap_bit_p (subregs_to_decompose, REGNO (dest))) + decompose = true; + + /* If this is the high word access of a double word pseudo that was marked + for decomposition, then use the _alt arrays as needed. */ + if (decompose && SUBREG_BYTE (orig) != 0) + alt = true; + + /* If DEST isn't maked for spilling, then there is nothing to do. */ + if (! bitmap_bit_p (pseudos_to_localize, REGNO (dest))) + return 0; + + /* A store can be viewed as a store followed by a load, so we can clear DEST + from REGS_TO_LOAD, but not if this was a partial store. */ + if (GET_CODE (orig) == STRICT_LOW_PART) + { + /* This must be treated as a USE too. + ?!? Does this need to integrated with the use processing? */ + emit_localizing_loads (dest, insn, subregs_to_decompose, df_flags); + } + else if (GET_CODE (orig) == SUBREG + && (GET_MODE_SIZE (GET_MODE (orig))) + < GET_MODE_SIZE (GET_MODE (dest))) + { + /* This must be treated as a USE too. + ?!? Does this need to integrated with the use processing? */ + if (!decompose) + emit_localizing_loads (dest, insn, subregs_to_decompose, df_flags); + } + + /* ?!? I'm not entirely sure this can still happen. */ + if ((alt ? alt_reg_map [(REGNO (dest))] : reg_map [(REGNO (dest))]) == NULL) + { + if (alt) + alt_reg_map [REGNO (dest)] = gen_reg_rtx (GET_MODE (orig)); + else + reg_map [REGNO (dest)] = gen_reg_rtx (decompose + ? GET_MODE (orig) + : GET_MODE (dest)); + } + + replace_rtx (insn, + (decompose ? orig : dest), + (alt ? alt_reg_map [REGNO (dest)] : reg_map [REGNO (dest)])); + return 1; +} + +/* Store each pseudo set by the current insn that is + marked for localizing into memory after INSN. + + Return 0 if INSN does not need rescanning. + + Return 1 if INSN needs rescanning. + + Return -1 if INSN should be deleted. */ + +static int +emit_localizing_stores (rtx dest, + rtx insn, + rtx tail, + bitmap subregs_to_decompose) +{ + unsigned int regno; + int retval = 0; + rtx insns; + rtx orig = dest; + bool decompose = false; + bool alt = false; + + if (GET_CODE (dest) == SUBREG) + dest = SUBREG_REG (dest); + + /* If the output isn't a register, then there's nothing to do. */ + if (GET_CODE (dest) != REG) + return retval; + + regno = REGNO (dest); + + /* If the register isn't marked for localization, then there's nothing + to do. */ + if (! bitmap_bit_p (pseudos_to_localize, regno)) + return retval; + + /* If this is word access of a double word pseudo that was marked for + decomposition, then decompose the double-word access to single word + accesses. */ + if (GET_CODE (orig) == SUBREG + && GET_MODE_SIZE (GET_MODE (orig)) < GET_MODE_SIZE (GET_MODE (dest)) + && bitmap_bit_p (subregs_to_decompose, regno)) + decompose = true; + + /* If this is the high word access of a double word pseudo that was marked + for decomposition, then use the _alt arrays as needed. */ + if (decompose && SUBREG_BYTE (orig) != 0) + alt = true; + + /* IF this register is marked for decomposition and INSN is a naked + CLOBBER, then mark INSN for deletion since it's not needed anymore. */ + if (bitmap_bit_p (subregs_to_decompose, regno) + && GET_CODE (PATTERN (insn)) == CLOBBER) + retval = -1; + + /* DEST is marked for spilling, if we have not emitted a spill store yet for + DEST, then do so now. Note we do not change INSN at this time. */ + if (bitmap_bit_p ((alt ? regs_to_store_alt : regs_to_store), regno)) + { + int nuses = pseudo_nuses[REGNO (dest)]; + int nsets = pseudo_nsets[REGNO (dest)]; + int occurrences = count_occurrences (PATTERN (insn), dest, 0); + + /* We must copy the memory location to avoid incorrect RTL sharing. */ + rtx mem = VEC_index (reg_equivs_t, reg_equivs, regno)->memory_loc; + + /* If this is unchanging memory, then just remove this insn. */ + if (MEM_READONLY_P (mem)) + return -1; + + mem = copy_rtx (mem); + + /* If we're decomposing a SUBREG, then the memory address needs + adjustment. */ + if (decompose) + mem = adjust_address_nv (mem, GET_MODE (orig), SUBREG_BYTE (orig)); + + /* Note that we have stored this register so that we don't try to + store it again. */ + bitmap_clear_bit ((alt ? regs_to_store_alt : regs_to_store), regno); + + /* validate_replace_rtx internally calls df_insn_rescan, which is + unsafe as our caller is iterating over the existing DF info. So + we have to turn off insn rescanning temporarily. */ + df_set_flags (DF_NO_INSN_RESCAN); + + /* If this insn both uses and sets a pseudo we want to localize and + contains all the uses and sets, then try to replace the pseudo + with its equivalent memory location. */ + if (nuses + && nsets == 1 + && (occurrences == nuses + || (occurrences == 1 + && nuses == 2 + && ! no_uses_after_this_set (dest, insn, tail))) + && validate_replace_rtx ((decompose ? orig : dest), mem, insn)) + { + pseudo_nsets[REGNO (dest)]--; + df_clear_flags (DF_NO_INSN_RESCAN); + retval = 1; + } + /* Similarly if this insn sets a pseudo we want to localize and + there are no uses after this set, then try to replace the pseudo + with its equivalent memory location. */ + else if ((nsets == 1 || nuses == 0) + && no_uses_after_this_set (dest, insn, tail) + && validate_replace_rtx ((decompose ? orig : dest), mem, insn)) + { + pseudo_nsets[REGNO (dest)]--; + df_clear_flags (DF_NO_INSN_RESCAN); + retval = 1; + } + else + { + df_clear_flags (DF_NO_INSN_RESCAN); + start_sequence (); + emit_move_insn (mem, (decompose ? orig : dest)); + insns = get_insns(); + end_sequence (); + + /* If the pseudo is being set from its equivalent memory location + and is unused from this point until the end of this block, then + we don't need to store the pseudo back to memory back, we + actually want to delete INSN. */ + if (NEXT_INSN (insns) == 0 + && single_set (insns) + && single_set (insn) + && rtx_equal_p (SET_SRC (single_set (insn)), + SET_DEST (single_set (insns)))) + { + if (no_uses_after_this_set (dest, insn, tail)) + retval = -1; + } + else + { + rtx temp; + /* Inform the DF framework about the new insns. */ + for (temp = insns; temp; temp = NEXT_INSN (temp)) + df_insn_rescan (temp); + + emit_insn_after_noloc (insns, insn, NULL); + } + } + } + + /* A store can be viewed as a store followed by a load, so we can clear DEST + from REGS_TO_LOAD, but not if this was a partial store. */ + if (GET_CODE (orig) == STRICT_LOW_PART) + bitmap_set_bit (regs_to_load, regno); + else if (GET_CODE (orig) == SUBREG + && (GET_MODE_SIZE (GET_MODE (orig)) + < GET_MODE_SIZE (GET_MODE (dest)))) + bitmap_set_bit ((alt ? regs_to_load_alt : regs_to_load), regno); + else if (retval == 0) + bitmap_clear_bit ((alt ? regs_to_load_alt : regs_to_load), REGNO (dest)); + return retval; +} + +static ira_allocno_t +create_new_allocno_for_spilling (int nreg, int oreg) +{ + ira_allocno_t to, from, a; + ira_allocno_iterator ai; + + /* Update IRA's datastructures. */ + + /* First create the allocno. */ + to = ira_create_allocno (nreg, true, ira_curr_loop_tree_node); + + /* This must occur before creating objects so that we know how many + objects to create. */ + from = ira_regno_allocno_map [oreg]; + ALLOCNO_CLASS (to) = ALLOCNO_CLASS (from); + + ira_create_allocno_objects (to); + + /* Now lengthen the regno->allocno map. */ + ira_free (ira_regno_allocno_map); + ira_regno_allocno_map + = (ira_allocno_t *) ira_allocate (max_reg_num () * sizeof (ira_allocno_t)); + memset (ira_regno_allocno_map, 0, max_reg_num () * sizeof (ira_allocno_t)); + FOR_EACH_ALLOCNO (a, ai) + { + unsigned int regno = ALLOCNO_REGNO (a); + ira_regno_allocno_map[regno] = a; + } + + return to; +} + + +static void +copy_allocno_for_spilling (int nreg, int oreg) +{ + ira_allocno_t to, from; + unsigned int conflicts, n, i; + live_range_t prev, range, r; + ira_object_conflict_iterator oci; + ira_object_t o; + + if (ira_dump_file != NULL) + fprintf (ira_dump_file, "Splitting range for %d using new reg %d.\n", + oreg, nreg); + + to = create_new_allocno_for_spilling (nreg, oreg); + from = ira_regno_allocno_map [oreg]; + + /* Copy various fields from the original allocno to the new one. */ +#ifdef STACK_REGS + ALLOCNO_NO_STACK_REG_P (to) = ALLOCNO_NO_STACK_REG_P (from); + ALLOCNO_TOTAL_NO_STACK_REG_P (to) = ALLOCNO_TOTAL_NO_STACK_REG_P (from); +#endif + ALLOCNO_NREFS (to) = ALLOCNO_NREFS (from); + ALLOCNO_FREQ (to) = REG_FREQ (nreg); + ALLOCNO_CALL_FREQ (to) = 0; + ALLOCNO_CALLS_CROSSED_NUM (to) = 0; + ALLOCNO_EXCESS_PRESSURE_POINTS_NUM (to) + = ALLOCNO_EXCESS_PRESSURE_POINTS_NUM (from); + ALLOCNO_BAD_SPILL_P (to) = ALLOCNO_BAD_SPILL_P (from); + + ALLOCNO_CLASS_COST (to) = ALLOCNO_CLASS_COST (from); + ALLOCNO_MEMORY_COST (to) = ALLOCNO_MEMORY_COST (from); + ALLOCNO_UPDATED_MEMORY_COST (to) = ALLOCNO_UPDATED_MEMORY_COST (from); + ALLOCNO_NEXT_REGNO_ALLOCNO (to) = NULL; + + n = ALLOCNO_NUM_OBJECTS (to); + for (i = 0; i < n; i++) + { + ira_object_t obj = ALLOCNO_OBJECT (to, i); + /* We recompute these fields after we have localized an entire block. */ + COPY_HARD_REG_SET (OBJECT_CONFLICT_HARD_REGS (obj), + ira_no_alloc_regs); + COPY_HARD_REG_SET (OBJECT_TOTAL_CONFLICT_HARD_REGS (obj), + ira_no_alloc_regs); + IOR_COMPL_HARD_REG_SET (OBJECT_CONFLICT_HARD_REGS (obj), + reg_class_contents[ALLOCNO_CLASS (to)]); + IOR_COMPL_HARD_REG_SET (OBJECT_TOTAL_CONFLICT_HARD_REGS (obj), + reg_class_contents[ALLOCNO_CLASS (to)]); + } + + /* ?!? This is a hack. + If the original allocno conflicts will all hard registers, then it must + have crossed a setjmp call or something similar. Just copy the conflicts + in this one case. + + Long term we'll catch this elsewhere as the new allocno may have not + have the same constraints on allocation that the original allocno had. */ + for (i = 0; i < n; i++) + { + HARD_REG_SET x; + ira_object_t tobj = ALLOCNO_OBJECT (to, i); + ira_object_t sobj = ALLOCNO_OBJECT (from, i); + + COMPL_HARD_REG_SET (x, OBJECT_CONFLICT_HARD_REGS (sobj)); + if (hard_reg_set_empty_p (x)) + { + COPY_HARD_REG_SET (OBJECT_CONFLICT_HARD_REGS (tobj), + OBJECT_CONFLICT_HARD_REGS (sobj)); + COPY_HARD_REG_SET (OBJECT_TOTAL_CONFLICT_HARD_REGS (tobj), + OBJECT_TOTAL_CONFLICT_HARD_REGS (sobj)); + } + + /* Count the number of conflicts on the original allocno. We use that + count as an estimate for the number of conflicts in the new allocno. + The new allocno should have fewer conflicts than the original as the + new allocno is only live in BB and thus only conflicts with objects + live in BB. */ + conflicts = 0; + FOR_EACH_OBJECT_CONFLICT (sobj, o, oci) + conflicts++; + + OBJECT_MIN (tobj) = OBJECT_MIN (sobj); + OBJECT_MAX (tobj) = OBJECT_MAX (sobj); + ira_allocate_object_conflicts (tobj, conflicts); + + /* For now we copy the live range from the original allocno to the new + allocno. This is very suboptimal. Consider if we have some allocno A + which gets split into A0..A100. A0..A100 will not be able to share + stack slots because the live ranges conflict (they were copied from A + verbatim) -- however, in reality each new allocno A0..A100 has a + distinct, non-conflicting live range. */ + for (prev = NULL, r = OBJECT_LIVE_RANGES (sobj); + r != NULL; + r = r->next, prev = range) + { + range = ira_create_live_range (tobj, r->start, r->finish, NULL); + /* ?!? This may not be necessary. */ + range->start_next = NULL; + range->finish_next = NULL; + if (prev) + prev->next = range; + else + OBJECT_LIVE_RANGES (tobj) = range; + } + } + +} + +static bitmap live; + +static void +maybe_add_conflict (int reg1, int reg2, int limit) +{ + ira_allocno_t a1, a2; + + if (reg1 < FIRST_PSEUDO_REGISTER + && reg2 < FIRST_PSEUDO_REGISTER) + return; + + /* If neither register was created by localization, then ignore this + conflict. */ + if (reg1 < limit && reg2 < limit) + return; + + if (reg1 < FIRST_PSEUDO_REGISTER) + { + ira_allocno_t a = ira_regno_allocno_map[reg2]; + int i, n; + + n = ALLOCNO_NUM_OBJECTS (a); + for (i = 0; i < n; i++) + { + SET_HARD_REG_BIT (OBJECT_TOTAL_CONFLICT_HARD_REGS (ALLOCNO_OBJECT (a, i)), reg1); + SET_HARD_REG_BIT (OBJECT_CONFLICT_HARD_REGS (ALLOCNO_OBJECT (a, i)), reg1); + } + return; + } + + if (reg2 < FIRST_PSEUDO_REGISTER) + { + ira_allocno_t a = ira_regno_allocno_map[reg1]; + int i, n; + + n = ALLOCNO_NUM_OBJECTS (a); + for (i = 0; i < n; i++) + { + SET_HARD_REG_BIT (OBJECT_TOTAL_CONFLICT_HARD_REGS (ALLOCNO_OBJECT (a, i)), reg2); + SET_HARD_REG_BIT (OBJECT_CONFLICT_HARD_REGS (ALLOCNO_OBJECT (a, i)), reg2); + } + return; + } + + /* If the registers are in different classes, then ignore this conflict. */ + a1 = ira_regno_allocno_map[reg1]; + a2 = ira_regno_allocno_map[reg2]; + if (!ira_reg_classes_intersect_p[ALLOCNO_CLASS (a1)][ALLOCNO_CLASS (a2)]) + return; + + ira_add_conflict (ALLOCNO_OBJECT (a1, 0), ALLOCNO_OBJECT (a2, 0)); + if (ALLOCNO_NUM_OBJECTS (a2) == 2) + ira_add_conflict (ALLOCNO_OBJECT (a1, 0), ALLOCNO_OBJECT (a2, 1)); + if (ALLOCNO_NUM_OBJECTS (a1) == 2) + ira_add_conflict (ALLOCNO_OBJECT (a1, 1), ALLOCNO_OBJECT (a2, 0)); +} + +static void +mark_conflicts (rtx reg, unsigned int limit) +{ + bitmap_iterator bi; + unsigned int i; + unsigned int j, nregs; + unsigned int non_killing_store = 0; + + if (GET_CODE (reg) == SUBREG + && GET_CODE (SUBREG_REG (reg)) == REG) + { + if (GET_MODE_SIZE (GET_MODE (reg)) + < GET_MODE_SIZE (GET_MODE (SUBREG_REG (reg)))) + non_killing_store = 1; + + reg = SUBREG_REG (reg); + } + + if (GET_CODE (reg) != REG) + return; + + if (REGNO (reg) >= FIRST_PSEUDO_REGISTER) + nregs = 1; + else + nregs = HARD_REGNO_NREGS (REGNO (reg), GET_MODE (reg)); + + for (j = 0; j < nregs; j++) + { + if (!non_killing_store) + { + bitmap_clear_bit (live, REGNO (reg) + j); + } + EXECUTE_IF_SET_IN_BITMAP (live, 0, i, bi) + maybe_add_conflict (i, REGNO (reg) + j, limit); + } +} + +static void +mark_live (rtx reg) +{ + int i, nregs; + + if (GET_CODE (reg) == SUBREG) + reg = SUBREG_REG (reg); + if (REGNO (reg) > FIRST_PSEUDO_REGISTER) + nregs = 1; + else + nregs = HARD_REGNO_NREGS (REGNO (reg), GET_MODE (reg)); + + for (i = 0; i < nregs; i++) + bitmap_set_bit (live, REGNO (reg)); +} + +static void +fix_allocno_map ( ira_loop_tree_node_t loop_tree_node) +{ + /* Free the old (and inaccurate) map. */ + free (loop_tree_node->regno_allocno_map); + + /* Now install the correct map. */ + loop_tree_node->regno_allocno_map = ira_regno_allocno_map; +} + +static void +build_conflicts_for_new_allocnos (rtx head, rtx tail, + bitmap pseudos_to_localize, + int orig_max_reg_num) +{ + rtx insn; + basic_block bb = NULL; + + + live = BITMAP_ALLOC (NULL); + + for (insn = tail; insn != PREV_INSN (head); insn = PREV_INSN (insn)) + { + df_ref *def_rec, *use_rec; + int call_p; + + if (!NONDEBUG_INSN_P (insn)) + continue; + + /* Anytime we start processing a block we have to merge in the + registers live at the end of that block - pseudos_to_localize. */ + if (bb != BLOCK_FOR_INSN (insn)) + { + hard_reg_set_iterator hrsi; + unsigned int i; + + bb = BLOCK_FOR_INSN (insn); + bitmap_ior_and_compl_into (live, DF_LIVE_OUT (bb), pseudos_to_localize); + EXECUTE_IF_SET_IN_HARD_REG_SET (eliminable_regset, 0, i, hrsi) + bitmap_clear_bit (live, i); + EXECUTE_IF_SET_IN_HARD_REG_SET (ira_no_alloc_regs, 0, i, hrsi) + bitmap_clear_bit (live, i); + } + + call_p = CALL_P (insn); + + if (call_p) + { + bitmap_iterator bi; + unsigned int i; + int freq; + + freq = REG_FREQ_FROM_BB (BLOCK_FOR_INSN (insn)); + if (freq == 0) + freq = 1; + EXECUTE_IF_SET_IN_BITMAP (live, orig_max_reg_num, i, bi) + { + ira_allocno_t a = ira_regno_allocno_map[i]; + unsigned int n = ALLOCNO_NUM_OBJECTS (a); + unsigned int j; + + ALLOCNO_CALLS_CROSSED_NUM (a)++; + ALLOCNO_CALL_FREQ (a) += freq; + + /* We want to avoid caller-saves for the new pseudos as + the new pseudos are already backed by a memory location. + + We could split these pseudos at call boundaries and remove + this hack. That would probably get us the best of both + worlds in most cases. */ + for (j = 0; j < n; j++) + { + ira_object_t obj = ALLOCNO_OBJECT (a, j); + + IOR_HARD_REG_SET (OBJECT_TOTAL_CONFLICT_HARD_REGS (obj), + call_used_reg_set); + IOR_HARD_REG_SET (OBJECT_CONFLICT_HARD_REGS (obj), + call_used_reg_set); + } + } + } + + /* Mark conflicts for any values defined in this insn. + Ideally we'd like to ignore conflicts resulting from simple + copies. Unfortunately that confuses reload because we can have + two pseudos assigned the same hard reg with overlapping lifetimes. + If the insn where one pseudo dies needs an output reload, then + reload (via combine_reloads) may select the dying pseudo's hard reg + to hold the output reload, which clobbers the value in the hard + reg which is still live. */ + for (def_rec = DF_INSN_DEFS (insn); *def_rec; def_rec++) + if (!call_p || !DF_REF_FLAGS_IS_SET (*def_rec, DF_REF_MAY_CLOBBER)) + mark_conflicts (DF_REF_REG (*def_rec), orig_max_reg_num); + + /* Mark each used value as live. */ + for (use_rec = DF_INSN_USES (insn); *use_rec; use_rec++) + mark_live (DF_REF_REG (*use_rec)); + } + + BITMAP_FREE (live); +} + +/* REF is a use or a set. For whatever registers REF refers to set the + appropriate bit in MAP. */ + +static void +record_a_use_or_set (bitmap map, rtx ref) +{ + if (GET_CODE (ref) == SUBREG) + ref = SUBREG_REG (ref); + + if (GET_CODE (ref) != REG) + return; + + bitmap_set_bit (map, REGNO (ref)); +} + +/* Emit trivial spill code for unallocated pseudos which are live at one or + more basic block boundaries appearing in BB. + + Effectively we're splitting the range of these pseudos in such a way as + the new pseudos are live within a single basic block by adding a store + after the last assignment to the pseudo and a load before the first use + within BB. + + ?!? We might be able to use extended basic blocks to avoid additional loads + and stores. Definitely worth some experimentation. */ + +static void +localize_pseudos (basic_block bb, bitmap pseudos_to_localize, bitmap visited) +{ + int orig_max_reg_num = max_reg_num (); + int i; + rtx insn, head, tail; + bitmap subreg = BITMAP_ALLOC (NULL); + bitmap full = BITMAP_ALLOC (NULL); + + /* Get the head and tail insns of this region. */ + head = BB_HEAD (bb); + for (;;) + { + edge e; + edge_iterator ei; + tail = BB_END (bb); + if (bb->next_bb == EXIT_BLOCK_PTR + || LABEL_P (BB_HEAD (bb->next_bb))) + break; + FOR_EACH_EDGE (e, ei, bb->succs) + if ((e->flags & EDGE_FALLTHRU) != 0) + break; + if (! e) + break; + bb = bb->next_bb; + } + + /* We can decompose some additional double-word SUBREGs here since we're + looking at a smaller window of insns than lower-subreg. However, this + method is also simpler, so in some ways its not as aggresive as + lower-subreg. + + For each pseudo, we want to know if the pseudo was used in its full + mode and if it was used in a partial mode via a subreg. */ + for (insn = tail; insn != PREV_INSN (head); insn = PREV_INSN (insn)) + { + df_ref *def_rec, *use_rec; + + /* We want to ignore naked CLOBBERs since they generate no code and + would impede decomposing double-word subregs. */ + if (!NONDEBUG_INSN_P (insn) || GET_CODE (PATTERN (insn)) == CLOBBER) + continue; + + /* For each def, see if it is a partial subreg store and if the size + of the outer mode is half the size of the inner mode and the size + of the outer mode is the same as a word. Note that for stores we + see the SUBREG itself which makes this easy. */ + for (def_rec = DF_INSN_DEFS (insn); *def_rec; def_rec++) + if ((DF_REF_FLAGS (*def_rec) & (DF_REF_SUBREG | DF_REF_PARTIAL)) + == (DF_REF_SUBREG | DF_REF_PARTIAL) + && (GET_MODE_BITSIZE (GET_MODE (DF_REF_REG (*def_rec))) * 2 + == GET_MODE_BITSIZE (GET_MODE (SUBREG_REG (DF_REF_REG (*def_rec))))) + && (GET_MODE_BITSIZE (GET_MODE (DF_REF_REG (*def_rec))) + == GET_MODE_BITSIZE (word_mode))) + record_a_use_or_set (subreg, DF_REF_REG (*def_rec)); + else + record_a_use_or_set (full, DF_REF_REG (*def_rec)); + + + /* Similarly for each use, except the use might be implied by a + write to a SUBREG. In that case we will not see the SUBREG + expression, but instead will see a full word read marked with + DF_REF_READ_WRITE. We want to consider those SUBREG reads + as they'll disappear if we decompose the SUBREG. */ + for (use_rec = DF_INSN_USES (insn); *use_rec; use_rec++) + if ((DF_REF_FLAGS (*use_rec) & (DF_REF_SUBREG | DF_REF_PARTIAL)) + == (DF_REF_SUBREG | DF_REF_PARTIAL) + && (GET_MODE_BITSIZE (GET_MODE (DF_REF_REG (*use_rec))) * 2 + == GET_MODE_BITSIZE (GET_MODE (SUBREG_REG (DF_REF_REG (*use_rec))))) + && (GET_MODE_BITSIZE (GET_MODE (DF_REF_REG (*use_rec))))) + record_a_use_or_set (subreg, DF_REF_REG (*use_rec)); + else if ((DF_REF_FLAGS (*use_rec) & (DF_REF_SUBREG | DF_REF_READ_WRITE)) + == (DF_REF_SUBREG | DF_REF_READ_WRITE) + && (GET_MODE_BITSIZE (GET_MODE (DF_REF_REG (*use_rec))) + == 2 * GET_MODE_BITSIZE (word_mode))) + record_a_use_or_set (subreg, DF_REF_REG (*use_rec)); + else + record_a_use_or_set (full, DF_REF_REG (*use_rec)); + } + + /* Now eliminate any pseudos that were used in their full width + for the candidates to decompose. */ + bitmap_and_compl_into (subreg, full); + + /* Eliminate regs not marked for localization from the + candidates to decompose. */ + bitmap_and_into (subreg, pseudos_to_localize); + + regs_to_store = BITMAP_ALLOC (NULL); + regs_to_store_alt = BITMAP_ALLOC (NULL); + regs_to_load = BITMAP_ALLOC (NULL); + regs_to_load_alt = BITMAP_ALLOC (NULL); + pseudo_nuses = (int *) xmalloc (max_reg_num () * sizeof (int)); + memset (pseudo_nuses, 0, max_reg_num () * sizeof (int)); + pseudo_nsets = (int *) xmalloc (max_reg_num () * sizeof (int)); + memset (pseudo_nsets, 0, max_reg_num () * sizeof (int)); + + reg_map = (rtx *) xmalloc (sizeof (rtx) * orig_max_reg_num); + memset (reg_map, 0, sizeof (rtx) * orig_max_reg_num); + alt_reg_map = (rtx *) xmalloc (sizeof (rtx) * orig_max_reg_num); + memset (alt_reg_map, 0, sizeof (rtx) * orig_max_reg_num); + + bitmap_copy (regs_to_store, pseudos_to_localize); + bitmap_copy (regs_to_store_alt, pseudos_to_localize); + + /* First walk over the insns in this region and identify singleton + uses of registers in PSEUDOS_TO_LOCALIZE. We want to know if a + use is a singleton so that we can change the use to a MEM. We + need this information prior to emitting localizing stores so that + we can change both the use and set in a single insn to a MEM. */ + for (insn = tail; insn != PREV_INSN (head); insn = PREV_INSN (insn)) + { + df_ref *def_rec, *use_rec; + + if (!NONDEBUG_INSN_P (insn)) + continue; + + bitmap_set_bit (visited, BLOCK_FOR_INSN (insn)->index); + + /* If we have traversed into a new basic block, then reset + NO_USES_AFTER_LAST_SET for any pseudo we want to localize + that is live-out on the edge(s) that we did NOT traverse. */ + if (bb != BLOCK_FOR_INSN (insn)) + { + edge e; + edge_iterator ei; + + bb = BLOCK_FOR_INSN (insn); + FOR_EACH_EDGE (e, ei, bb->succs) + { + bitmap_ior_and_into (regs_to_store, + pseudos_to_localize, + DF_LIVE_IN (e->dest)); + bitmap_ior_and_into (regs_to_store_alt, + pseudos_to_localize, + DF_LIVE_IN (e->dest)); + } + } + + /* We don't want CLOBBERS to be counted as they generate no code. */ + if (GET_CODE (PATTERN (insn)) == CLOBBER) + continue; + + for (def_rec = DF_INSN_DEFS (insn); *def_rec; def_rec++) + identify_singleton_sets (DF_REF_REG (*def_rec)); + + for (use_rec = DF_INSN_USES (insn); *use_rec; use_rec++) + identify_singleton_uses (DF_REF_REG (*use_rec), + subreg, + DF_REF_FLAGS (*use_rec)); + } + + /* Next emit a store after the last assignment of each pseudo in + PSEUDOS_TO_LOCALIZE within the region. Collect list of pseudos + we'll need to load as well. */ + for (bb = BLOCK_FOR_INSN (tail), insn = tail; + insn != PREV_INSN (BB_HEAD (BLOCK_FOR_INSN (head))); + insn = PREV_INSN (insn)) + { + df_ref *def_rec, *use_rec; + int status; + + if (!NONDEBUG_INSN_P (insn)) + continue; + + /* If we have traversed into a new basic block, then reset REGS_TO_STORE + for any pseudo we want to localize that is live-out on the edge(s) + that we did NOT traverse. */ + if (bb != BLOCK_FOR_INSN (insn)) + { + edge e; + edge_iterator ei; + + bb = BLOCK_FOR_INSN (insn); + FOR_EACH_EDGE (e, ei, bb->succs) + { + if ((e->flags & EDGE_FALLTHRU) == 0) + { + bitmap_ior_and_into (regs_to_store, + pseudos_to_localize, + DF_LIVE_IN (e->dest)); + bitmap_ior_and_into (regs_to_store_alt, + pseudos_to_localize, + DF_LIVE_IN (e->dest)); + } + } + } + + status = 0; + for (def_rec = DF_INSN_DEFS (insn); *def_rec; def_rec++) + status |= emit_localizing_stores (DF_REF_REG (*def_rec), + insn, tail, subreg); + + /* A return status of -1 indicates INSN should be removed, including + naked CLOBBERS. Do not delete other assignments that are not + simple SET insns. */ + if (status == -1 + && (GET_CODE (PATTERN (insn)) == CLOBBER + || single_set (insn))) + { + set_insn_deleted (insn); + continue; + } + + /* It is not safe to defer scanning any further as emit_localizing_stores + can change uses and defs. */ + if (status) + df_insn_rescan (insn); + + for (use_rec = DF_INSN_USES (insn); *use_rec; use_rec++) + collect_loads (DF_REF_REG (*use_rec), subreg, DF_REF_FLAGS (*use_rec)); + } + + /* Now walk forward through the region emitting loads before + the first use of each pseudo that we're localizing and change + each reference from an unallocated pseudo to a new block local + spill register. */ + for (insn = head; + insn != NEXT_INSN (BB_END (BLOCK_FOR_INSN (tail))); + insn = NEXT_INSN (insn)) + { + df_ref *def_rec, *use_rec; + int need_rescan; + + if (!NONDEBUG_INSN_P (insn)) + continue; + + need_rescan = 0; + for (use_rec = DF_INSN_USES (insn); *use_rec; use_rec++) + need_rescan |= emit_localizing_loads (DF_REF_REG (*use_rec), + insn, + subreg, + DF_REF_FLAGS (*use_rec)); + + for (def_rec = DF_INSN_DEFS (insn); *def_rec; def_rec++) + need_rescan |= rename_sets (DF_REF_REG (*def_rec), + insn, + subreg, + DF_REF_FLAGS (*def_rec)); + + if (need_rescan) + df_insn_rescan (insn); + + } + + /* If we allocated new pseudos, then we need to expand various arrays and + update IRA's data structures. */ + if (orig_max_reg_num != max_reg_num ()) + { + unsigned int max_regno = max_reg_num (); + unsigned int nregs = max_regno - orig_max_reg_num; + + /* First expand various data structures. */ + regstat_reallocate_ri (max_regno); + expand_reg_info (max_regno - 1); + grow_reg_equivs (); + regstat_n_sets_and_refs + = ((struct regstat_n_sets_and_refs_t *) + xrealloc (regstat_n_sets_and_refs, + (max_regno * sizeof (struct regstat_n_sets_and_refs_t)))); + memset (®stat_n_sets_and_refs[orig_max_reg_num], 0, + nregs * sizeof (struct regstat_n_sets_and_refs_t)); + reg_max_ref_width = (unsigned int *) xrealloc (reg_max_ref_width, + max_regno * sizeof (unsigned int)); + memset (®_max_ref_width[orig_max_reg_num], 0, + nregs * sizeof (unsigned int)); + + /* Now copy data from the original register to the new register. */ + for (i = 0; i < orig_max_reg_num; i++) + { + int nregno; + + if (reg_map[i] != 0) + { + nregno = REGNO (reg_map[i]); + setup_reg_classes (nregno, + reg_preferred_class (i), + reg_alternate_class (i), + reg_preferred_class (i)); + VEC_index (reg_equivs_t, reg_equivs, nregno)->invariant + = VEC_index (reg_equivs_t, reg_equivs, i)->invariant; + VEC_index (reg_equivs_t, reg_equivs, nregno)->constant + = VEC_index (reg_equivs_t, reg_equivs, i)->constant; + VEC_index (reg_equivs_t, reg_equivs, nregno)->mem + = VEC_index (reg_equivs_t, reg_equivs, i)->mem; + VEC_index (reg_equivs_t, reg_equivs, nregno)->alt_mem_list + = VEC_index (reg_equivs_t, reg_equivs, i)->alt_mem_list; + VEC_index (reg_equivs_t, reg_equivs, nregno)->address + = VEC_index (reg_equivs_t, reg_equivs, i)->address; + VEC_index (reg_equivs_t, reg_equivs, nregno)->memory_loc + = VEC_index (reg_equivs_t, reg_equivs, i)->memory_loc; + /* ?!? I don't recall why this was originally necessary. + Definitely need to retest and understand or delete it. */ + VEC_index (reg_equivs_t, reg_equivs, i)->init = NULL; +#if 0 + VEC_index (reg_equivs_t, reg_equivs, nregno)->init + = VEC_index (reg_equivs_t, reg_equivs, i)->init; +#endif + reg_max_ref_width[nregno] = reg_max_ref_width[i]; + reg_renumber[nregno] = reg_renumber[i]; + REG_N_CALLS_CROSSED (nregno) = REG_N_CALLS_CROSSED (i); + REG_FREQ (nregno) = ((pseudo_nuses[i] + pseudo_nsets[i]) + * REG_FREQ_FROM_BB (bb)); + + /* We don't really care other than to be sure there's a set + and ref. */ + SET_REG_N_SETS (nregno, 1); + SET_REG_N_REFS (nregno, 1); + + /* The new register is always local to this block. */ + REG_BASIC_BLOCK (nregno) = REG_BLOCK_GLOBAL; + + /* Create a new allocno for the new register. */ + copy_allocno_for_spilling (nregno, i); + } + + if (alt_reg_map[i] != 0) + { + nregno = REGNO (alt_reg_map[i]); + setup_reg_classes (nregno, + reg_preferred_class (i), + reg_alternate_class (i), + reg_preferred_class (i)); + VEC_index (reg_equivs_t, reg_equivs, nregno)->invariant + = VEC_index (reg_equivs_t, reg_equivs, i)->invariant; + VEC_index (reg_equivs_t, reg_equivs, nregno)->constant + = VEC_index (reg_equivs_t, reg_equivs, i)->constant; + VEC_index (reg_equivs_t, reg_equivs, nregno)->mem + = VEC_index (reg_equivs_t, reg_equivs, i)->mem; + VEC_index (reg_equivs_t, reg_equivs, nregno)->alt_mem_list + = VEC_index (reg_equivs_t, reg_equivs, i)->alt_mem_list; + VEC_index (reg_equivs_t, reg_equivs, nregno)->address + = VEC_index (reg_equivs_t, reg_equivs, i)->address; + VEC_index (reg_equivs_t, reg_equivs, nregno)->memory_loc + = VEC_index (reg_equivs_t, reg_equivs, i)->memory_loc; + /* ?!? I don't recall why this was originally necessary. + Definitely need to retest and understand or delete it. */ + VEC_index (reg_equivs_t, reg_equivs, i)->init = NULL; +#if 0 + VEC_index (reg_equivs_t, reg_equivs, nregno)->init + = VEC_index (reg_equivs_t, reg_equivs, i)->init; +#endif + reg_max_ref_width[nregno] = reg_max_ref_width[i]; + reg_renumber[nregno] = reg_renumber[i]; + REG_N_CALLS_CROSSED (nregno) = REG_N_CALLS_CROSSED (i); + REG_FREQ (nregno) = ((pseudo_nuses[i] + pseudo_nsets[i]) + * REG_FREQ_FROM_BB (bb)); + + /* We don't really care other than to be sure there's a set + and ref. */ + SET_REG_N_SETS (nregno, 1); + SET_REG_N_REFS (nregno, 1); + + /* The new register is always local to this block. */ + REG_BASIC_BLOCK (nregno) = REG_BLOCK_GLOBAL; + + /* Create a new allocno for the new register. */ + copy_allocno_for_spilling (nregno, i); + } + } + + /* Now look for any pseudos >= orig_max_reg_num which do not have + an associated allocno. These must have been created as temporaries + by emit_move_insn. We'll need allocnos for them. */ + for (i = orig_max_reg_num; i < (int)max_regno; i++) + if (ira_regno_allocno_map[i] == NULL) + abort (); + + + /* Now reallocate a few IRA arrays. */ + ira_finish_assign (); + ira_initiate_assign (); + + /* Fill in the sorted_allocnos and priority arrays. */ + { + ira_allocno_t a; + ira_allocno_iterator ai; + unsigned int num = 0; + extern ira_allocno_t *sorted_allocnos; + + FOR_EACH_ALLOCNO (a, ai) + sorted_allocnos[num++] = a; + setup_allocno_priorities (sorted_allocnos, num); + } + + /* We didn't copy the conflicts from the old allocno to the new allocno + as typically the new allocno will have fewer conflicts. + + We wait until after we've created all the new allocnos for this block + so that we can update the conflict graph with a single backwards walk + through this block. */ + build_conflicts_for_new_allocnos (head, + tail, + pseudos_to_localize, + orig_max_reg_num); + + /* We added new live-range objects, so rebuild the chains. */ + ira_rebuild_start_finish_chains (); + } + + free (reg_map); + reg_map = NULL; + free (alt_reg_map); + alt_reg_map = NULL; + BITMAP_FREE (regs_to_store); + regs_to_store = NULL; + BITMAP_FREE (regs_to_store_alt); + regs_to_store_alt = NULL; + BITMAP_FREE (regs_to_load); + regs_to_load = NULL; + BITMAP_FREE (regs_to_load_alt); + regs_to_load_alt = NULL; + free (pseudo_nuses); + pseudo_nuses = NULL; + free (pseudo_nsets); + pseudo_nsets = NULL; + BITMAP_FREE (subreg); + subreg = NULL; + BITMAP_FREE (full); + full = NULL; +} + +void +ira_reload (void) +{ + /* We need to build the various equivalences prior to assigning stack + slots for unallocated global pseudos. */ + record_equivalences_for_reload (); + + /* What to do when this isn't true? */ + if (ira_conflicts_p) + { + unsigned int i, j; + bitmap_iterator bi; + basic_block bb; + bitmap visited; + int orig_max_reg_num = max_reg_num (); + + pseudos_to_localize = BITMAP_ALLOC (NULL); + max_regno = max_reg_num (); + visited = BITMAP_ALLOC (NULL); + + /* Collect all the registers we want to localize into a bitmap. + We don't want to localize pseudos which are contained wholly + within an EBB, so we look for pseudos which are live at the + start of the EBB or at the end of the EBB. */ + FOR_EACH_BB (bb) + { + if (!bitmap_bit_p (visited, bb->index)) + { + /* This collects pseudos live at the start of the EBB. */ + EXECUTE_IF_SET_IN_BITMAP (DF_LIVE_IN (bb), FIRST_PSEUDO_REGISTER, i, bi) + if (localize_pseudo_p (i)) + bitmap_set_bit (pseudos_to_localize, i); + + for (;;) + { + edge e; + edge_iterator ei; + bitmap_set_bit (visited, bb->index); + if (bb->next_bb == EXIT_BLOCK_PTR + || LABEL_P (BB_HEAD (bb->next_bb))) + break; + FOR_EACH_EDGE (e, ei, bb->succs) + if ((e->flags & EDGE_FALLTHRU) != 0) + break; + if (! e) + break; + bb = bb->next_bb; + } + + /* This collects pseudos live at the end of the EBB. */ + EXECUTE_IF_SET_IN_BITMAP (DF_LIVE_OUT (bb), FIRST_PSEUDO_REGISTER, i, bi) + if (localize_pseudo_p (i)) + bitmap_set_bit (pseudos_to_localize, i); + } + } + + /* If an assignment to a pseudo has a REG_EQUIV note attached to it for + a non-constant memory address, that memory location can generally be + considered the "home" for the pseudo. + + That works great, except when the memory location contains a reference + to a pseudo which we are going to localize. If we use the equivalence + we will introduce an uninitialized use of the pseudo we're localizing. + + We just remove the equivalence for now; we could do better since we + know these regs will be local to a block and thus we can derive the + split pseudo's current name and update the note. */ + for (j = FIRST_PSEUDO_REGISTER; j < (unsigned) max_regno; j++) + { + rtx memloc = VEC_index (reg_equivs_t, reg_equivs, j)->memory_loc; + + if (!memloc) + continue; + + EXECUTE_IF_SET_IN_BITMAP (pseudos_to_localize, FIRST_PSEUDO_REGISTER, + i, bi) + { + if (reg_mentioned_p (regno_reg_rtx[i], memloc)) + { + rtx list; + VEC_index (reg_equivs_t, reg_equivs, j)->memory_loc + = NULL_RTX; + for (list = VEC_index (reg_equivs_t, reg_equivs, j)->init; + list; + list = XEXP (list, 1)) + { + rtx equiv_insn = XEXP (list, 0); + rtx set = single_set (equiv_insn); + if (set + && REG_P (SET_DEST (set)) + && SET_DEST (set) == regno_reg_rtx[j]) + { + rtx note = find_reg_note (equiv_insn, REG_EQUIV, NULL_RTX); + remove_note (equiv_insn, note); + } + + } + VEC_index (reg_equivs_t, reg_equivs, j)->init = NULL_RTX; + + break; + } + } + } + + + /* Assign stack slots for pseudos live at block boundaries which did not + get hard regs. This unfortunately turns pseudos into hard regs which + we will need to undo later. */ + assign_stack_slots (); + + for (i = FIRST_PSEUDO_REGISTER; i < (unsigned) max_regno; i++) + if (regno_reg_rtx[i]) + SET_REGNO (regno_reg_rtx[i], i); + + if (!bitmap_empty_p (pseudos_to_localize)) + { + bitmap_clear (visited); + FOR_EACH_BB (bb) + if (!bitmap_bit_p (visited, bb->index)) + localize_pseudos (bb, pseudos_to_localize, visited); + } + + /* Recompute DF info, primarily to get accurate death notes which + IRA can utilize to give better allocations. + + We could do this by hand and only in blocks of interest and + probably will in the future. But for now, go with the + heavyweight, but clearly simple solution. */ + df_scan_alloc (NULL); + df_scan_blocks (); + df_analyze (); + + /* We want to be able to call back into routines to compute costs and + record copies via ira_traverse_loop_tree. Those routines typically + require the regno->allocno map within the loop tree structures to + be accurate. So we first traverse the loop tree to free the old + map and set up the correct map. */ + ira_traverse_loop_tree (true, ira_loop_tree_root, + fix_allocno_map, NULL); + + /* We may have replaced the source or destination of a "copy" with a new + pseudo. Make sure the new "copies" get recorded. This includes + register shuffles to satisfy constraints. */ + ira_traverse_loop_tree (true, ira_loop_tree_root, ira_add_copies, NULL); + + /* Get costing information for any newly created pseudos. */ + ira_costs (orig_max_reg_num); + ira_tune_allocno_costs (); + + /* We may have allocated additional pseudos during spilling, so update + max_regno. ?!? Updating max_regno should really occur when we + allocate new regs. Or better yet, max it go away completely. */ + max_regno = max_reg_num (); + + /* Now we want to remove each allocnos associated with the pseudos we + localized from the conflicts of every other allocno. Do this once + after localizing in all blocks rather than in each block. */ + EXECUTE_IF_SET_IN_BITMAP (pseudos_to_localize, + FIRST_PSEUDO_REGISTER, i, bi) + { + ira_allocno_t a = ira_regno_allocno_map[i]; + int n = ALLOCNO_NUM_OBJECTS (ira_regno_allocno_map[i]); + int j; + + for (j = 0; j < n; j++) + remove_from_all_conflicts (ALLOCNO_OBJECT (a, j)); + SET_REG_N_REFS (i, 0); + SET_REG_N_SETS (i, 0); + } + + /* Try to assign hard regs to pseudos that didn't get them the + first time through the allocator. */ + { + unsigned int i, n; + int *x = (int *) xmalloc (max_regno * sizeof (int)); + HARD_REG_SET bad_spill_regs ; + HARD_REG_SET *p = XNEWVEC (HARD_REG_SET, max_regno); + regset_head z; + + + if (internal_flag_ira_verbose > 0 && ira_dump_file != NULL) + fprintf (ira_dump_file, "Reassigning after localization\n"); + + INIT_REG_SET (&z); + CLEAR_REG_SET (&z); + COPY_HARD_REG_SET (bad_spill_regs, fixed_reg_set); + memset (x, 0, max_regno * sizeof (int)); + for (n = 0, i = LAST_VIRTUAL_REGISTER + 1; i < (unsigned) max_regno; i++) + { + CLEAR_HARD_REG_SET (p[i]); + if (reg_renumber[i] == -1 && ira_regno_allocno_map[i] && !bitmap_bit_p (pseudos_to_localize, i)) + x[n++] = i; + } + + ira_reassign_pseudos (x, n, bad_spill_regs, p, p, &z, false); + + if (internal_flag_ira_verbose > 0 && ira_dump_file != NULL) + fprintf (ira_dump_file, "Done reassigning after localization\n"); + } + + BITMAP_FREE (pseudos_to_localize); + + + /* Spill code insertion can force creation of new basic blocks. */ + if (fixup_abnormal_edges ()) + commit_edge_insertions (); + + for (i = FIRST_PSEUDO_REGISTER; i < (unsigned) max_regno; i++) + if (regno_reg_rtx[i]) + SET_REGNO (regno_reg_rtx[i], i); + + /* Finally, reset the DF analyzer as we have added new blocks, new + insns, modified existing insns, etc etc. */ + df_scan_alloc (NULL); + df_scan_blocks (); + df_analyze (); + } +} diff --git a/gcc/ira.c b/gcc/ira.c index e3d3fe30385..e335a701175 100644 --- a/gcc/ira.c +++ b/gcc/ira.c @@ -2240,7 +2240,7 @@ setup_preferred_alternate_classes_for_new_pseudos (int start) /* Regional allocation can create new pseudo-registers. This function expands some arrays for pseudo-registers. */ -static void +void expand_reg_info (int old_size) { int i; @@ -3686,6 +3686,7 @@ ira (FILE *f) if (delete_trivially_dead_insns (get_insns (), max_reg_num ())) df_analyze (); + max_regno = max_reg_num (); if (max_regno != max_regno_before_ira) { @@ -3719,6 +3720,9 @@ ira (FILE *f) timevar_pop (TV_IRA); timevar_push (TV_RELOAD); + + ira_reload (); + df_set_flags (DF_NO_INSN_RESCAN); build_insn_chain (); diff --git a/gcc/ira.h b/gcc/ira.h index 60518ecb313..ab6f8080a50 100644 --- a/gcc/ira.h +++ b/gcc/ira.h @@ -138,10 +138,9 @@ extern void ira_sort_regnos_for_alter_reg (int *, int, unsigned int *); extern void ira_mark_allocation_change (int); extern void ira_mark_memory_move_deletion (int, int); extern bool ira_reassign_pseudos (int *, int, HARD_REG_SET, HARD_REG_SET *, - HARD_REG_SET *, bitmap); + HARD_REG_SET *, bitmap, bool); extern rtx ira_reuse_stack_slot (int, unsigned int, unsigned int); extern void ira_mark_new_stack_slot (rtx, int, unsigned int); extern bool ira_better_spill_reload_regno_p (int *, int *, rtx, rtx, rtx); extern bool ira_bad_reload_regno (int, rtx, rtx); - extern void ira_adjust_equiv_reg_cost (unsigned, int); diff --git a/gcc/params.def b/gcc/params.def index 239b684b5fc..c27fd64e4f2 100644 --- a/gcc/params.def +++ b/gcc/params.def @@ -566,6 +566,14 @@ DEFPARAM(PARAM_MAX_RELOAD_SEARCH_INSNS, "The maximum number of instructions to search backward when looking for equivalent reload", 100, 0, 0) +/* Clamp the number of forward insn scans when prioritizing spill registers to + avoid potential compile-time slowdowns in pathological cases. */ + +DEFPARAM(PARAM_MAX_RELOAD_FORWARD_SEARCH_INSNS, + "max-reload-forward-search-insns", + "The maximum number of instructions to search forward prioritizing spill registers", + 20, 0, 0) + DEFPARAM(PARAM_SINK_FREQUENCY_THRESHOLD, "sink-frequency-threshold", "Target block's relative execution frequency (as a percentage) required to sink a statement", diff --git a/gcc/regs.h b/gcc/regs.h index 328b839ffac..8a48b4d3a38 100644 --- a/gcc/regs.h +++ b/gcc/regs.h @@ -99,6 +99,7 @@ extern void regstat_free_ri (void); extern bitmap regstat_get_setjmp_crosses (void); extern void regstat_compute_calls_crossed (void); extern void regstat_free_calls_crossed (void); +extern void regstat_reallocate_ri (unsigned int); /* Register information indexed by register number. This structure is diff --git a/gcc/regstat.c b/gcc/regstat.c index bfd743bffd4..7f1c33d6cb8 100644 --- a/gcc/regstat.c +++ b/gcc/regstat.c @@ -336,6 +336,16 @@ regstat_bb_compute_ri (unsigned int bb_index, bitmap_clear (local_live); } +/* Reallocate the register info structure, ensuring the new entries are + properly cleared. */ +void +regstat_reallocate_ri (unsigned int max_regno) +{ + reg_info_p = XRESIZEVEC (struct reg_info_t, reg_info_p, max_regno); + memset (®_info_p[reg_info_p_size], 0, + (max_regno - reg_info_p_size) * sizeof (struct reg_info_t)); + reg_info_p_size = max_regno; +} /* Compute register info: lifetime, bb, and number of defs and uses. */ void diff --git a/gcc/reload.h b/gcc/reload.h index ad0cfcd82d8..9f73072c4d9 100644 --- a/gcc/reload.h +++ b/gcc/reload.h @@ -204,6 +204,7 @@ extern struct target_reload *this_target_reload; #define caller_save_initialized_p \ (this_target_reload->x_caller_save_initialized_p) +extern unsigned int *reg_max_ref_width; /* Register equivalences. Indexed by register number. */ typedef struct reg_equivs { @@ -463,6 +464,13 @@ extern void debug_reload (void); reloading to/from a register that is wider than a word. */ extern rtx reload_adjust_reg_for_mode (rtx, enum machine_mode); +/* Modify the home of a pseudo register. */ +extern void alter_reg (int, int, bool); + +/* Record memory and constant equivalences for pseudos which did not get hard + registers. */ +extern void record_equivalences_for_reload (void); + /* Ideally this function would be in ira.c or reload, but due to dependencies on integrate.h, it's part of integrate.c. */ extern void allocate_initial_values (VEC (reg_equivs_t, gc) *); diff --git a/gcc/reload1.c b/gcc/reload1.c index 2f783a2940f..0bd415113b7 100644 --- a/gcc/reload1.c +++ b/gcc/reload1.c @@ -45,6 +45,7 @@ along with GCC; see the file COPYING3. If not see #include "except.h" #include "tree.h" #include "ira.h" +#include "params.h" #include "target.h" #include "emit-rtl.h" @@ -103,7 +104,7 @@ static regset_head reg_has_output_reload; static HARD_REG_SET reg_is_output_reload; /* Widest width in which each pseudo reg is referred to (via subreg). */ -static unsigned int *reg_max_ref_width; +unsigned int *reg_max_ref_width; /* Vector to remember old contents of reg_renumber before spilling. */ static short *reg_old_renumber; @@ -256,6 +257,13 @@ static bool need_dce; /* List of all insns needing reloads. */ static struct insn_chain *insns_need_reload; + +/* TRUE if we have initialized the DF datastructures at some point in this + instance of reload. FALSE otherwise. + + We use this to lazily update the DF information from within + allocate_reload_reg. */ +bool df_initialized; /* This structure is used to record information about register eliminations. Each array entry describes one possible way of eliminating a register @@ -361,7 +369,6 @@ static void delete_caller_save_insns (void); static void spill_failure (rtx, enum reg_class); static void count_spilled_pseudo (int, int, int); static void delete_dead_insn (rtx); -static void alter_reg (int, int, bool); static void set_label_offsets (rtx, rtx, int); static void check_eliminable_occurrences (rtx); static void elimination_effects (rtx, enum machine_mode); @@ -421,6 +428,10 @@ static bool gen_reload_chain_without_interm_reg_p (int, int); static int reloads_conflict (int, int); static rtx gen_reload (rtx, rtx, int, enum reload_type); static rtx emit_insn_if_valid_for_reload (rtx); + +/* Return whether or not X is a REG or (SUBREG (REG)). */ +#define REG_OR_SUBREG_P(X) \ + (REG_P (X) || (GET_CODE (X) == SUBREG && REG_P (SUBREG_REG (X)))) /* Initialize the reload pass. This is called at the beginning of compilation and may be called again if the target is reinitialized. */ @@ -705,12 +716,14 @@ static int *temp_pseudo_reg_arr; bool reload (rtx first, int global) { - int i, n; + int i; rtx insn; struct elim_table *ep; basic_block bb; bool inserted; + df_initialized = false; + /* Make sure even insns with volatile mem refs are recognizable. */ init_recog (); @@ -773,27 +786,20 @@ reload (rtx first, int global) memcpy (reg_old_renumber, reg_renumber, max_regno * sizeof (short)); pseudo_forbidden_regs = XNEWVEC (HARD_REG_SET, max_regno); pseudo_previous_regs = XCNEWVEC (HARD_REG_SET, max_regno); - CLEAR_HARD_REG_SET (bad_spill_regs_global); init_eliminable_invariants (first, true); init_elim_table (); - /* Alter each pseudo-reg rtx to contain its hard reg number. Assign - stack slots to the pseudos that lack hard regs or equivalents. - Do not touch virtual registers. */ - + /* Other hunks of code still use this array. Ugh. */ temp_pseudo_reg_arr = XNEWVEC (int, max_regno - LAST_VIRTUAL_REGISTER - 1); - for (n = 0, i = LAST_VIRTUAL_REGISTER + 1; i < max_regno; i++) - temp_pseudo_reg_arr[n++] = i; - if (ira_conflicts_p) - /* Ask IRA to order pseudo-registers for better stack slot - sharing. */ - ira_sort_regnos_for_alter_reg (temp_pseudo_reg_arr, n, reg_max_ref_width); + /* ira-reload may have created new pseudos which didn't get hard registers + or stack slots. Assign them stack slots now. Also alter each pseudo + to contain its hard reg number. */ - for (i = 0; i < n; i++) - alter_reg (temp_pseudo_reg_arr[i], -1, false); + for (i = LAST_VIRTUAL_REGISTER + 1; i < max_regno; i++) + alter_reg (i, -1, false); /* If we have some registers we think can be eliminated, scan all insns to see if there is an insn that sets one of these registers to something @@ -1338,6 +1344,128 @@ reload (rtx first, int global) return need_dce; } +/* Find all the pseudo registers that didn't get hard regs + but do have known equivalent constants or memory slots. + These include parameters (known equivalent to parameter slots) + and cse'd or loop-moved constant memory addresses. + + Record constant equivalents in reg_equiv_constant + so they will be substituted by find_reloads. + Record memory equivalents in reg_mem_equiv so they can + be substituted eventually by altering the REG-rtx's. */ + +void +record_equivalences_for_reload (void) +{ + rtx insn, first; + int i; + int max_regno = max_reg_num (); + + first = get_insns (); + grow_reg_equivs (); + reg_max_ref_width = XCNEWVEC (unsigned int, max_regno); + + /* Look for REG_EQUIV notes; record what each pseudo is equivalent + to. Also find all paradoxical subregs and find largest such for + each pseudo. */ + + num_eliminable_invariants = 0; + for (insn = first; insn; insn = NEXT_INSN (insn)) + { + rtx set = single_set (insn); + + /* We may introduce USEs that we want to remove at the end, so + we'll mark them with QImode. Make sure there are no + previously-marked insns left by say regmove. */ + if (INSN_P (insn) && GET_CODE (PATTERN (insn)) == USE + && GET_MODE (insn) != VOIDmode) + PUT_MODE (insn, VOIDmode); + + if (NONDEBUG_INSN_P (insn)) + scan_paradoxical_subregs (PATTERN (insn)); + + if (set != 0 && REG_P (SET_DEST (set))) + { + rtx note = find_reg_note (insn, REG_EQUIV, NULL_RTX); + rtx x; + + if (! note) + continue; + + i = REGNO (SET_DEST (set)); + x = XEXP (note, 0); + + if (i <= LAST_VIRTUAL_REGISTER) + continue; + + /* If flag_pic and we have a constant, verify its legitimate. */ + if (!CONSTANT_P (x) + || !flag_pic || LEGITIMATE_PIC_OPERAND_P (x)) + { + /* It can happen that a REG_EQUIV note contains a MEM + that is not a legitimate memory operand. As later + stages of reload assume that all addresses found + in the reg_equiv_* arrays were originally legitimate, + we ignore such REG_EQUIV notes. */ + if (memory_operand (x, VOIDmode)) + { + /* Always unshare the equivalence, so we can + substitute into this insn without touching the + equivalence. */ + reg_equiv_memory_loc (i) = copy_rtx (x); + } + else if (function_invariant_p (x)) + { + enum machine_mode mode; + + mode = GET_MODE (SET_DEST (set)); + if (GET_CODE (x) == PLUS) + { + /* This is PLUS of frame pointer and a constant, + and might be shared. Unshare it. */ + reg_equiv_invariant (i) = copy_rtx (x); + num_eliminable_invariants++; + } + else if (x == frame_pointer_rtx || x == arg_pointer_rtx) + { + reg_equiv_invariant (i) = x; + num_eliminable_invariants++; + } + else if (targetm.legitimate_constant_p (mode, x)) + reg_equiv_constant (i) = x; + else + { + + reg_equiv_memory_loc (i) + = force_const_mem (GET_MODE (SET_DEST (set)), x); + if (! reg_equiv_memory_loc (i)) + reg_equiv_init (i) = NULL_RTX; + } + } + else + { + reg_equiv_init (i) = NULL_RTX; + continue; + } + } + else + reg_equiv_init (i) = NULL_RTX; + } + } + + if (dump_file) + for (i = FIRST_PSEUDO_REGISTER; i < max_regno; i++) + if (reg_equiv_init (i)) + { + fprintf (dump_file, "init_insns for %u: ", i); + print_inline_rtx (dump_file, + reg_equiv_init (i), + 20); + fprintf (dump_file, "\n"); + } +} + + /* Yet another special case. Unfortunately, reg-stack forces people to write incorrect clobbers in asm statements. These clobbers must not cause the register to appear in bad_spill_regs, otherwise we'll call @@ -2155,7 +2283,7 @@ delete_dead_insn (rtx insn) This is used so that all pseudos spilled from a given hard reg can share one stack slot. */ -static void +void alter_reg (int i, int from_reg, bool dont_share_p) { /* When outputting an inline function, this can happen @@ -3008,7 +3136,6 @@ elimination_effects (rtx x, enum machine_mode mem_mode) } else if (reg_renumber[regno] < 0 - && reg_equivs != 0 && reg_equiv_constant (regno) && ! function_invariant_p (reg_equiv_constant (regno))) elimination_effects (reg_equiv_constant (regno), mem_mode); @@ -4370,7 +4497,7 @@ finish_spills (int global) if (ira_reassign_pseudos (temp_pseudo_reg_arr, n, bad_spill_regs_global, pseudo_forbidden_regs, pseudo_previous_regs, - &spilled_pseudos)) + &spilled_pseudos, true)) something_changed = 1; } /* Fix up the register information in the insn chain. @@ -6149,6 +6276,55 @@ set_reload_reg (int i, int r) return 0; } +/* If OBJ is or references a reg in SPILL_REG_SEARCH, then add the register + to MY_SPILL_REG_ORDER (filling in from the end of the array since these + are less desirable spill registers). */ +static void +mark_spill_regs (rtx obj, HARD_REG_SET *spill_reg_search, + int *my_spill_reg_order, int *index) +{ + int regno; + + if (GET_CODE (obj) == SUBREG) + obj = SUBREG_REG (obj); + + if (GET_CODE (obj) != REG) + return; + + /* If this is a pseudo, see if the pseudo was allocated to a spill reg + or if the pseudo was recently reloaded into a spill reg. */ + if (REGNO (obj) >= FIRST_PSEUDO_REGISTER) + { + regno = reg_renumber [REGNO (obj)]; + if (regno == -1 && reg_last_reload_reg[REGNO (obj)]) + { + /* OBJ was reloaded into a hard reg, now see if the hard + reg still contains OBJ. If true, then we mark that we + found the hard reg (spill reg). If not, then we don't + care. */ + rtx tmp = reg_last_reload_reg [REGNO (obj)]; + if (reg_reloaded_contents[REGNO (tmp)] == (int)REGNO (obj)) + regno = REGNO (tmp); + else + regno = -1; + } + } + else + regno = REGNO (obj); + + if (regno == -1) + return; + + if (TEST_HARD_REG_BIT (*spill_reg_search, regno)) + { + /* We start filling in from the end of the array since the + first register we find is the worst and each we find is + progressively better. */ + my_spill_reg_order[--(*index)] = regno; + CLEAR_HARD_REG_BIT (*spill_reg_search, regno); + } +} + /* Find a spill register to use as a reload register for reload R. LAST_RELOAD is nonzero if this is the last reload for the insn being processed. @@ -6162,7 +6338,7 @@ static int allocate_reload_reg (struct insn_chain *chain ATTRIBUTE_UNUSED, int r, int last_reload) { - int i, pass, count; + int pass, count, regnum; /* If we put this reload ahead, thinking it is a group, then insist on finding a group. Otherwise we can grab a @@ -6179,6 +6355,125 @@ allocate_reload_reg (struct insn_chain *chain ATTRIBUTE_UNUSED, int r, int force_group = rld[r].nregs > 1 && ! last_reload; + /* The set of spill registers we have not yet seen while scaning + insns. */ + HARD_REG_SET spill_reg_search; + + /* Preferred ordering of hard regs for spilling in this insn. + + In the past we had a static order for selecting PILL_REGS and + we just used spill regs in a round-robin fashion (to improve + the possibility that a reload could be inherited). + + A much better method is to prefer the spill reg which is unused + for the longest period of time after the insn requiring reloads, + with some exceptions. This exposes more inheritance opportunities + and makes inheritance more stable/predictable (less dependent on + the last register which was used as a spill). */ + int insn_spill_reg_order[FIRST_PSEUDO_REGISTER]; + int n_spills_left = n_spills; + rtx insn; + + + memset (insn_spill_reg_order, -1, FIRST_PSEUDO_REGISTER * sizeof (int)); + COPY_HARD_REG_SET (spill_reg_search, used_spill_regs); + + /* Search forward from the insn needing reloads for all the registers + in SPILL_REG_SEARCH. Stop when we either find all the spill regs + or we hit the end of the basic block containing the insn needing + reloads (chain->insn). */ + if (n_spills > 1) + { + basic_block bb = BLOCK_FOR_INSN (chain->insn); + int num; + + /* It is reasonably common for a function to never call + allocate_reload_reg, so we want to avoid the overhead of + DF initialization in that case. So we lazily initialize it + here when we know we're going to use the DF information. */ + if (!df_initialized) + { + df_initialized = true; + df_insn_rescan_all (); + } + + for (insn = chain->insn, num = 0; + insn && BLOCK_FOR_INSN (insn) == bb; + insn = NEXT_INSN (insn)) + { + df_ref *def_rec, *use_rec; + rtx set; + + if (!NONDEBUG_INSN_P (insn)) + continue; + + num++; + + /* If this is a reg-reg copy of the in-reload or out-reload + register, then ignore any hard regs referenced by the + other operand as using those regs for the reload reg + will often allow the copy to be eliminated. Ideally we'd + put those spill regs at the start of our list. */ + set = single_set (insn); + if (set + && REG_OR_SUBREG_P (SET_SRC (set)) + && REG_OR_SUBREG_P (SET_DEST (set)) + && ((rld[r].in + && REG_OR_SUBREG_P (rld[r].in) + && reg_overlap_mentioned_p (rld[r].in, SET_DEST (set))) + || (rld[r].out + && REG_OR_SUBREG_P (rld[r].out) + && reg_overlap_mentioned_p (rld[r].out, SET_SRC (set))))) + continue; + + /* Mark any spill regs that are used or set by INSN. */ + for (def_rec = DF_INSN_DEFS (insn); *def_rec; def_rec++) + mark_spill_regs (DF_REF_REG (*def_rec), &spill_reg_search, + insn_spill_reg_order, &n_spills_left); + + for (use_rec = DF_INSN_USES (insn); *use_rec; use_rec++) + mark_spill_regs (DF_REF_REG (*use_rec), &spill_reg_search, + insn_spill_reg_order, &n_spills_left); + + /* If there is only one reg left to find, it must be the most + desirable and we can stop scanning. */ + if (n_spills_left <= 1 + || num > PARAM_VALUE (PARAM_MAX_RELOAD_SEARCH_INSNS)) + break; + } + } + + /* If we didn't find all the spill regs, then use the old round robin + technique for the leftovers. The leftovers are actually better + spill regs than the ones found during the scan, so fill + INSN_SPILL_REG_ORDER from the start. */ + if (!hard_reg_set_empty_p (spill_reg_search)) + { + int i = last_spill_reg; + int spill_regs_found; + + for (spill_regs_found = 0, count = 0; + count < n_spills && spill_regs_found < n_spills_left; + count++) + { + int regnum; + + i++; + if (i >= n_spills) + i -= n_spills; + regnum = spill_regs[i]; + if (TEST_HARD_REG_BIT (spill_reg_search, regnum)) + { + insn_spill_reg_order[spill_regs_found++] = regnum; + CLEAR_HARD_REG_BIT (spill_reg_search, regnum); + } + } + } + + /* If there's anything left in SPILL_REG_SEARCH, something has gone + horribly wrong. Abort now to make debugging easier. */ + gcc_assert (hard_reg_set_empty_p (spill_reg_search)); + /* If we want a single register and haven't yet found one, take any reg in the right class and not in use. If we want a consecutive group, here is where we look for it. @@ -6200,17 +6495,11 @@ allocate_reload_reg (struct insn_chain *chain ATTRIBUTE_UNUSED, int r, equally, so that inherited reloads have a chance of leapfrogging each other. */ - i = last_spill_reg; - for (count = 0; count < n_spills; count++) { int rclass = (int) rld[r].rclass; - int regnum; - i++; - if (i >= n_spills) - i -= n_spills; - regnum = spill_regs[i]; + regnum = insn_spill_reg_order[count]; if ((reload_reg_free_p (regnum, rld[r].opnum, rld[r].when_needed) @@ -6279,10 +6568,10 @@ allocate_reload_reg (struct insn_chain *chain ATTRIBUTE_UNUSED, int r, if (count >= n_spills) return 0; - /* I is the index in SPILL_REG_RTX of the reload register we are to - allocate. Get an rtx for it and find its register number. */ - - return set_reload_reg (i, r); + /* SPILL_REG_ORDER[REGNO] is the index in SPILL_REG_RTX of the reload + register we are to allocate. Get an rtx for it and find its register + number. */ + return set_reload_reg (spill_reg_order[regnum], r); } /* Initialize all the tables needed to allocate reload registers. |