diff options
52 files changed, 14707 insertions, 179 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 7aae9b1696d..2c24ea10052 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,220 @@ +2008-08-26 Vladimir Makarov <vmakarov@redhat.com> + + * ira-build.c, ira-color.c, ira-costs.c, ira.h, ira-lives.c, + ira.c, ira-conflicts.c, ira-emit.c, ira-int.h: New files. + + * doc/passes.texi: Describe IRA. + + * doc/tm.texi (IRA_COVER_CLASSES, + IRA_HARD_REGNO_ADD_COST_MULTIPLIER): Describe the new macros. + + * doc/invoke.texi (ira-max-loops-num): Describe the new parameter. + (-fira, -fira-algorithm, -fira-coalesce, -fno-ira-move-spills, + -fira-propagate-cost, -fno-ira-share-save-slots, + -fno-ira-share-spill-slots, -fira-verbose): Describe new options. + + * flags.h (ira_algorithm): New enumeration. + (flag_ira_algorithm, flag_ira_verbose): New external variable + declarations. + + * postreload.c (gate_handle_postreload): Don't do post reload + optimizations unless the reload is completed. + + * reload.c (push_reload, find_dummy_reload): Use DF_LR_OUT for + IRA. + + * tree-pass.h (pass_ira): New external variable declaration. + + * reload.h: Add 2008 to the Copyright. + + * cfgloopanal.c: Include params.h. + (estimate_reg_pressure_cost): Decrease cost for IRA optimization + mode. + + * params.h (IRA_MAX_LOOPS_NUM): New macro. + + * toplev.c (ira.h): New include. + (flag_ira_algorithm, flag_ira_verbose): New external variables. + (backend_init_target): Call ira_init. + (backend_init): Call ira_init_once. + (finalize): Call finish_ira_once. + + * toplev.h (flag_ira, flag_ira_coalesce, flag_ira_move_spills, + flag_ira_share_save_slots, flag_ira_share_spill_slots): New + external variables. + + * regs.h (contains_reg_of_mode, move_cost, may_move_in_cost, + may_move_out_cost): New external variable declarations. + (move_table): New typedef. + + * caller-save.c: Include headers output.h and ira.h. + (no_caller_save_reg_set): New global variable. + (save_slots_num, save_slots): New variables. + (reg_save_code, reg_restore_code, add_stored_regs): Add + prototypes. + (init_caller_save): Set up no_caller_save_reg_set. + (init_save_areas): Reset save_slots_num. + (saved_hard_reg): New structure. + (hard_reg_map, saved_regs_num, all_saved_regs): New variables. + (initiate_saved_hard_regs, new_saved_hard_reg, + finish_saved_hard_regs, saved_hard_reg_compare_func): New + functions. + (setup_save_areas): Add code for sharing stack slots. + (all_blocks): New variable. + (save_call_clobbered_regs): Process pseudo-register too. + (mark_set_regs): Process pseudo-register too. + (insert_one_insn): Put the insn after bb note in a empty basic + block. Add insn check. + + * global.c (eliminable_regset): Make it external. + (mark_elimination): Use DF_LR_IN for IRA. + (pseudo_for_reload_consideration_p): New. + (build_insn_chain): Make it external. Don't ignore spilled + pseudos for IRA. Use pseudo_for_reload_consideration_p. + (gate_handle_global_alloc): New function. + (pass_global_alloc): Add the gate function. + + * opts.c (decode_options): Set up flag_ira. Print the warning for + -fira. + (common_handle_option): Process -fira-algorithm and -fira-verbose. + + * timevar.def (TV_IRA, TV_RELOAD): New passes. + + * regmove.c (regmove_optimize): Don't do replacement of output for + IRA. + + * hard-reg-set.h (no_caller_save_reg_set, reg_class_subclasses): + New external variable declarations. + + * local-alloc.c (update_equiv_regs): Make it external. Return + true if jump label rebuilding should be done. Rescan new_insn for + notes. + (gate_handle_local_alloc): New function. + (pass_local_alloc): Add the gate function. + + * alias.c (value_addr_p, stack_addr_p): New functions. + (nonoverlapping_memrefs_p): Use them for IRA. + + * common.opt (fira, fira-algorithm, fira-coalesce, + fira-move-spills, fira-share-save-slots, fira-share-spill-slots, + fira-verbose): New options. + + * regclass.c (reg_class_subclasses, contains_reg_of_mode, + move_cost, may_move_in_cost, may_move_out_cost): Make the + variables external. + (move_table): Remove typedef. + (init_move_cost): Make it external. + (allocate_reg_info, resize_reg_info, setup_reg_classes): New + functions. + + * rtl.h (init_move_cost, allocate_reg_info, resize_reg_info, + setup_reg_classes): New function prototypes. + (eliminable_regset): New external variable declaration. + (build_insn_chain, update_equiv_regs): New function prototypes. + + * Makefile.in (IRA_INT_H): New definition. + (OBJS-common): Add ira.o, ira-build.o, ira-costs.o, + ira-conflicts.o, ira-color.o, ira-emit.o, and ira-lives.o. + (reload1.o, toplev.o): Add dependence on ira.h. + (cfgloopanal.o): Add PARAMS_H. + (caller-save.o): Add dependence on output.h and ira.h. + (ira.o, ira-build.o, ira-costs.o, ira-conflicts.o, ira-color.o, + ira-emit.o, ira-lives.o): New entries. + + * passes.c (pass_ira): New pass. + + * params.def (PARAM_IRA_MAX_LOOPS_NUM): New parameter. + + * reload1.c (ira.h): Include the header. + (changed_allocation_pseudos): New bitmap. + (init_reload): Initiate the bitmap. + (compute_use_by_pseudos): Permits spilled registers in FROM. + (temp_pseudo_reg_arr): New variable. + (reload): Allocate and free temp_pseudo_reg_arr. Sort pseudos for + IRA. Call alter_reg with the additional parameter. Don't clear + spilled_pseudos for IRA. Restore original insn chain for IRA. + Clear changed_allocation_pseudos at the end of reload. + (calculate_needs_all_insns): Call IRA's mark_memory_move_deletion. + (hard_regno_to_pseudo_regno): New variable. + (count_pseudo): Check spilled pseudos. Set up + hard_regno_to_pseudo_regno. + (count_spilled_pseudo): Check spilled pseudos. Update + hard_regno_to_pseudo_regno. + (find_reg): Use better_spill_reload_regno_p. Check + hard_regno_to_pseudo_regno. + (alter_reg): Set up spilled_pseudos. Add a new parameter. Add + code for IRA. + (eliminate_regs_1): Use additional parameter for alter_reg. + (finish_spills): Set up pseudo_previous_regs only for spilled + pseudos. Call reassign_pseudos once for all spilled pseudos, pass + more arguments. Don't clear live_throughout and dead_or_set for + spilled pseudos. Use additional parameter for alter_reg. Call + mark_allocation_change. Set up changed_allocation_pseudos. + Remove sanity check. + (emit_input_reload_insns, delete_output_reload): Use additional + parameter for alter_reg. Call mark_allocation_change. + (substitute, gen_reload_chain_without_interm_reg_p): New + functions. + (reloads_conflict): Use gen_reload_chain_without_interm_reg_p. + + * testsuite/gcc.dg/20080410-1.c: New file. + + * config/s390/s390.h (IRA_COVER_CLASSES, + IRA_HARD_REGNO_ADD_COST_MULTIPLIER): Define. + + * config/sparc/sparc.h (IRA_COVER_CLASSES): New macro. + + * config/i386/i386.h (IRA_COVER_CLASSES): Ditto. + + * config/ia64/ia64.h (IRA_COVER_CLASSES): Ditto. + + * config/rs6000/rs6000.h (IRA_COVER_CLASSES): Ditto. + + * config/arm/arm.h (IRA_COVER_CLASSES): Ditto. + + * config/alpha/alpha.h (IRA_COVER_CLASSES): Ditto. + + 2008-08-24 Jeff Law <law@redhat.com> + * ira.c (setup_reg_class_intersect_union): Prefer smallest class + when ignoring unavailable registers. + + 2008-08-24 Jeff Law <law@redhat.com> + * ira-color.c (coalesced_pseudo_reg_slot_compare): Check + FRAME_GROWS_DOWNWARD and STACK_GROWS_DOWNWARD. + * ira.c (setup_eliminable_regset): Check stack_realign_needed. + * config/mn10300/mn10300.h (IRA_COVER_CLASSES): New macro. + + 2008-06-03 Steve Chamberlain <steve.chamberlain@gmail.com> + * ira-build.c (allocno_range_compare_func): Stabilize sort. + + 2008-05-29 Andy Hutchinson <hutchinsonandy@aim.com> + * config/avr/avr.h (IRA_COVER_CLASSES): New macro. + * reload1.c (find_reg): Process registers in register allocation order. + + 2008-05-10 Richard Sandiford <rsandifo@nildram.co.uk> + * toplev.c (backend_init_target): Move ira_init call from + here... + (lang_dependent_init_target): ...to here. + + 2008-05-10 Richard Sandiford <rsandifo@nildram.co.uk> + * ira.c (setup_class_subset_and_memory_move_costs): Don't + calculate memory move costs for NO_REGS. + + 2008-05-05 Kaz Kojima <kkojima@gcc.gnu.org> + * ira-color.c (ira_fast_allocation): Use no_stack_reg_p only if + STACK_REGS is defined. + + 2008-04-08 Andrew Pinski <andrew_pinski@playstation.sony.com> + * config/spu/spu.h (IRA_COVER_CLASSES): New macro. + + 2008-04-04 Bernd Schmidt <bernd.schmidt@analog.com> + * config/bfin/bfin.h (IRA_COVER_CLASSES): New macro. + + 2008-04-04 Kaz Kojima <kkojima@gcc.gnu.org> + * config/sh/sh.h (IRA_COVER_CLASSES): Define. + * config/sh/sh.md (movsicc_true+3): Check if emit returns a + barrier. + 2008-08-26 Victor Kaplansky <victork@il.ibm.com> Dorit Nuzman <dorit@il.ibm.com> diff --git a/gcc/Makefile.in b/gcc/Makefile.in index e50c2d5600d..8edcd949323 100644 --- a/gcc/Makefile.in +++ b/gcc/Makefile.in @@ -849,6 +849,7 @@ TREE_DATA_REF_H = tree-data-ref.h $(LAMBDA_H) omega.h graphds.h tree-chrec.h VARRAY_H = varray.h $(MACHMODE_H) $(SYSTEM_H) coretypes.h $(TM_H) TREE_INLINE_H = tree-inline.h $(VARRAY_H) pointer-set.h REAL_H = real.h $(MACHMODE_H) +IRA_INT_H = ira.h ira-int.h $(CFGLOOP_H) alloc-pool.h DBGCNT_H = dbgcnt.h dbgcnt.def EBIMAP_H = ebitmap.h sbitmap.h IPA_PROP_H = ipa-prop.h $(TREE_H) vec.h $(CGRAPH_H) @@ -1097,6 +1098,13 @@ OBJS-common = \ init-regs.o \ integrate.o \ intl.o \ + ira.o \ + ira-build.o \ + ira-costs.o \ + ira-conflicts.o \ + ira-color.o \ + ira-emit.o \ + ira-lives.o \ jump.o \ lambda-code.o \ lambda-mat.o \ @@ -2408,7 +2416,7 @@ toplev.o : toplev.c $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) $(TREE_H) \ $(INSN_ATTR_H) output.h $(DIAGNOSTIC_H) debug.h insn-config.h intl.h \ $(RECOG_H) Makefile $(TOPLEV_H) dwarf2out.h sdbout.h dbxout.h $(EXPR_H) \ hard-reg-set.h $(BASIC_BLOCK_H) graph.h except.h $(REGS_H) $(TIMEVAR_H) \ - value-prof.h $(PARAMS_H) $(TM_P_H) reload.h dwarf2asm.h $(TARGET_H) \ + value-prof.h $(PARAMS_H) $(TM_P_H) reload.h ira.h dwarf2asm.h $(TARGET_H) \ langhooks.h insn-flags.h $(CFGLAYOUT_H) $(CFGLOOP_H) hosthooks.h \ $(CGRAPH_H) $(COVERAGE_H) alloc-pool.h $(GGC_H) $(INTEGRATE_H) \ opts.h params.def tree-mudflap.h $(REAL_H) tree-pass.h $(GIMPLE_H) @@ -2771,7 +2779,7 @@ cfgloop.o : cfgloop.c $(CONFIG_H) $(SYSTEM_H) $(RTL_H) coretypes.h $(TM_H) \ $(GGC_H) cfgloopanal.o : cfgloopanal.c $(CONFIG_H) $(SYSTEM_H) $(RTL_H) \ $(BASIC_BLOCK_H) hard-reg-set.h $(CFGLOOP_H) $(EXPR_H) coretypes.h $(TM_H) \ - $(OBSTACK_H) output.h graphds.h + $(OBSTACK_H) output.h graphds.h $(PARAMS_H) graphds.o : graphds.c graphds.h $(CONFIG_H) $(SYSTEM_H) $(BITMAP_H) $(OBSTACK_H) \ coretypes.h vec.h vecprim.h loop-iv.o : loop-iv.c $(CONFIG_H) $(SYSTEM_H) $(RTL_H) $(BASIC_BLOCK_H) \ @@ -2835,7 +2843,7 @@ reload1.o : reload1.c $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) $(RTL_H) \ $(EXPR_H) $(OPTABS_H) reload.h $(REGS_H) hard-reg-set.h insn-config.h \ $(BASIC_BLOCK_H) $(RECOG_H) output.h $(FUNCTION_H) $(TOPLEV_H) $(TM_P_H) \ addresses.h except.h $(TREE_H) $(REAL_H) $(FLAGS_H) $(MACHMODE_H) \ - $(OBSTACK_H) $(DF_H) $(TARGET_H) dse.h + $(OBSTACK_H) $(DF_H) $(TARGET_H) dse.h ira.h rtlhooks.o : rtlhooks.c $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) $(RTL_H) \ rtlhooks-def.h $(EXPR_H) $(RECOG_H) postreload.o : postreload.c $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) \ @@ -2851,7 +2859,7 @@ postreload-gcse.o : postreload-gcse.c $(CONFIG_H) $(SYSTEM_H) coretypes.h \ caller-save.o : caller-save.c $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) $(RTL_H) \ $(FLAGS_H) $(REGS_H) hard-reg-set.h insn-config.h $(BASIC_BLOCK_H) $(FUNCTION_H) \ addresses.h $(RECOG_H) reload.h $(EXPR_H) $(TOPLEV_H) $(TM_P_H) $(DF_H) \ - gt-caller-save.h $(GGC_H) + output.h ira.h gt-caller-save.h $(GGC_H) bt-load.o : bt-load.c $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) except.h \ $(RTL_H) hard-reg-set.h $(REGS_H) $(TM_P_H) $(FIBHEAP_H) output.h $(EXPR_H) \ $(TARGET_H) $(FLAGS_H) $(INSN_ATTR_H) $(FUNCTION_H) tree-pass.h $(TOPLEV_H) \ @@ -2872,6 +2880,37 @@ stack-ptr-mod.o : stack-ptr-mod.c $(CONFIG_H) $(SYSTEM_H) coretypes.h \ init-regs.o : init-regs.c $(CONFIG_H) $(SYSTEM_H) coretypes.h \ $(TM_H) $(TREE_H) $(RTL_H) $(REGS_H) $(EXPR_H) tree-pass.h \ $(BASIC_BLOCK_H) $(FLAGS_H) $(DF_H) +ira-build.o: ira-build.c $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) \ + $(TARGET_H) $(RTL_H) $(REGS_H) hard-reg-set.h $(FLAGS_H) \ + insn-config.h $(RECOG_H) $(BASIC_BLOCK_H) $(TOPLEV_H) $(TM_P_H) \ + $(PARAMS_H) $(DF_H) sparseset.h $(IRA_INT_H) +ira-costs.o: ira-costs.c $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) \ + $(TARGET_H) $(RTL_H) insn-config.h $(RECOG_H) \ + $(REGS_H) hard-reg-set.h $(FLAGS_H) errors.h \ + $(EXPR_H) $(BASIC_BLOCK_H) $(TM_P_H) \ + $(IRA_INT_H) +ira-conflicts.o: ira-conflicts.c $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) \ + $(TARGET_H) $(RTL_H) $(REGS_H) hard-reg-set.h $(FLAGS_H) \ + insn-config.h $(RECOG_H) $(BASIC_BLOCK_H) $(TOPLEV_H) $(TM_P_H) $(PARAMS_H) \ + $(DF_H) sparseset.h $(IRA_INT_H) +ira-color.o: ira-color.c $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) \ + $(TARGET_H) $(RTL_H) $(REGS_H) hard-reg-set.h $(FLAGS_H) \ + $(EXPR_H) $(BASIC_BLOCK_H) $(TOPLEV_H) $(TM_P_H) $(PARAMS_H) \ + $(DF_H) $(SPLAY_TREE_H) $(IRA_INT_H) +ira-emit.o: ira-emit.c $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) \ + $(TARGET_H) $(RTL_H) $(REGS_H) hard-reg-set.h $(FLAGS_H) \ + $(EXPR_H) $(BASIC_BLOCK_H) $(TOPLEV_H) $(TM_P_H) $(PARAMS_H) \ + $(IRA_INT_H) +ira-lives.o: ira-lives.c $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) \ + $(TARGET_H) $(RTL_H) $(REGS_H) hard-reg-set.h $(FLAGS_H) \ + insn-config.h $(RECOG_H) $(BASIC_BLOCK_H) $(TOPLEV_H) $(TM_P_H) $(PARAMS_H) \ + $(DF_H) sparseset.h $(IRA_INT_H) +ira.o: ira.c $(CONFIG_H) $(SYSTEM_H) coretypes.h \ + $(TARGET_H) $(TM_H) $(RTL_H) $(RECOG_H) \ + $(REGS_H) hard-reg-set.h $(FLAGS_H) $(OBSTACK_H) \ + $(EXPR_H) $(BASIC_BLOCK_H) $(TOPLEV_H) $(TM_P_H) \ + $(DF_H) $(IRA_INT_H) $(PARAMS_H) $(TIMEVAR_H) $(INTEGRATE_H) \ + tree-pass.h output.h regmove.o : regmove.c $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) $(RTL_H) \ insn-config.h $(TIMEVAR_H) tree-pass.h $(DF_H)\ $(RECOG_H) output.h $(REGS_H) hard-reg-set.h $(FLAGS_H) $(FUNCTION_H) \ diff --git a/gcc/alias.c b/gcc/alias.c index 684205cd89e..56660ec38d9 100644 --- a/gcc/alias.c +++ b/gcc/alias.c @@ -1975,6 +1975,34 @@ adjust_offset_for_component_ref (tree x, rtx offset) return GEN_INT (ioffset); } +/* The function returns nonzero if X is an address containg VALUE. */ +static int +value_addr_p (rtx x) +{ + if (GET_CODE (x) == VALUE) + return 1; + if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == VALUE) + return 1; + return 0; +} + +/* The function returns nonzero if X is a stack address. */ +static int +stack_addr_p (rtx x) +{ + if (x == hard_frame_pointer_rtx || x == frame_pointer_rtx + || x == arg_pointer_rtx || x == stack_pointer_rtx) + return 1; + if (GET_CODE (x) == PLUS + && (XEXP (x, 0) == hard_frame_pointer_rtx + || XEXP (x, 0) == frame_pointer_rtx + || XEXP (x, 0) == arg_pointer_rtx + || XEXP (x, 0) == stack_pointer_rtx) + && CONSTANT_P (XEXP (x, 1))) + return 1; + return 0; +} + /* Return nonzero if we can determine the exprs corresponding to memrefs X and Y and they do not overlap. */ @@ -1984,9 +2012,27 @@ nonoverlapping_memrefs_p (const_rtx x, const_rtx y) tree exprx = MEM_EXPR (x), expry = MEM_EXPR (y); rtx rtlx, rtly; rtx basex, basey; + rtx x_addr, y_addr; rtx moffsetx, moffsety; HOST_WIDE_INT offsetx = 0, offsety = 0, sizex, sizey, tem; + if (flag_ira && optimize && reload_completed) + { + /* We need this code for IRA because of stack slot sharing. RTL + in decl can be different than RTL used in insns. It is a + safe code although it can be conservative sometime. */ + x_addr = canon_rtx (get_addr (XEXP (x, 0))); + y_addr = canon_rtx (get_addr (XEXP (y, 0))); + + if (value_addr_p (x_addr) || value_addr_p (y_addr)) + return 0; + + if (stack_addr_p (x_addr) && stack_addr_p (y_addr) + && memrefs_conflict_p (SIZE_FOR_MODE (y), y_addr, + SIZE_FOR_MODE (x), x_addr, 0)) + return 0; + } + /* Unless both have exprs, we can't tell anything. */ if (exprx == 0 || expry == 0) return 0; diff --git a/gcc/caller-save.c b/gcc/caller-save.c index e3d76c654be..ee8a0dc6631 100644 --- a/gcc/caller-save.c +++ b/gcc/caller-save.c @@ -1,6 +1,6 @@ /* Save and restore call-clobbered registers which are live across a call. Copyright (C) 1989, 1992, 1994, 1995, 1997, 1998, 1999, 2000, - 2001, 2002, 2003, 2004, 2005, 2006, 2007 Free Software Foundation, Inc. + 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008 Free Software Foundation, Inc. This file is part of GCC. @@ -35,9 +35,14 @@ along with GCC; see the file COPYING3. If not see #include "toplev.h" #include "tm_p.h" #include "addresses.h" +#include "output.h" #include "df.h" #include "ggc.h" +/* Call used hard registers which can not be saved because there is no + insn for this. */ +HARD_REG_SET no_caller_save_reg_set; + #ifndef MAX_MOVE_MAX #define MAX_MOVE_MAX MOVE_MAX #endif @@ -62,6 +67,12 @@ static enum machine_mode static rtx regno_save_mem[FIRST_PSEUDO_REGISTER][MAX_MOVE_MAX / MIN_UNITS_PER_WORD + 1]; +/* The number of elements in the subsequent array. */ +static int save_slots_num; + +/* Allocated slots so far. */ +static rtx save_slots[FIRST_PSEUDO_REGISTER]; + /* We will only make a register eligible for caller-save if it can be saved in its widest mode with a simple SET insn as long as the memory address is valid. We record the INSN_CODE is those insns here since @@ -86,7 +97,17 @@ static int n_regs_saved; static HARD_REG_SET referenced_regs; +static int reg_save_code (int, enum machine_mode); +static int reg_restore_code (int, enum machine_mode); + +struct saved_hard_reg; +static void initiate_saved_hard_regs (void); +static struct saved_hard_reg *new_saved_hard_reg (int, int); +static void finish_saved_hard_regs (void); +static int saved_hard_reg_compare_func (const void *, const void *); + static void mark_set_regs (rtx, const_rtx, void *); +static void add_stored_regs (rtx, const_rtx, void *); static void mark_referenced_regs (rtx); static int insert_save (struct insn_chain *, int, int, HARD_REG_SET *, enum machine_mode *); @@ -95,7 +116,9 @@ static int insert_restore (struct insn_chain *, int, int, int, static struct insn_chain *insert_one_insn (struct insn_chain *, int, int, rtx); static void add_stored_regs (rtx, const_rtx, void *); + + static GTY(()) rtx savepat; static GTY(()) rtx restpat; static GTY(()) rtx test_reg; @@ -180,6 +203,7 @@ init_caller_save (void) rtx address; int i, j; + CLEAR_HARD_REG_SET (no_caller_save_reg_set); /* First find all the registers that we need to deal with and all the modes that they can have. If we can't find a mode to use, we can't have the register live over calls. */ @@ -217,7 +241,7 @@ init_caller_save (void) for (i = 0; i < FIRST_PSEUDO_REGISTER; i++) if (TEST_HARD_REG_BIT (reg_class_contents - [(int) base_reg_class (regno_save_mode [i][1], PLUS, CONST_INT)], i)) + [(int) base_reg_class (regno_save_mode[i][1], PLUS, CONST_INT)], i)) break; gcc_assert (i < FIRST_PSEUDO_REGISTER); @@ -264,10 +288,14 @@ init_caller_save (void) { call_fixed_regs[i] = 1; SET_HARD_REG_BIT (call_fixed_reg_set, i); + if (call_used_regs[i]) + SET_HARD_REG_BIT (no_caller_save_reg_set, i); } } } + + /* Initialize save areas by showing that we haven't allocated any yet. */ void @@ -278,6 +306,100 @@ init_save_areas (void) for (i = 0; i < FIRST_PSEUDO_REGISTER; i++) for (j = 1; j <= MOVE_MAX_WORDS; j++) regno_save_mem[i][j] = 0; + save_slots_num = 0; + +} + +/* The structure represents a hard register which should be saved + through the call. It is used when the integrated register + allocator (IRA) is used and sharing save slots is on. */ +struct saved_hard_reg +{ + /* Order number starting with 0. */ + int num; + /* The hard regno. */ + int hard_regno; + /* Execution frequency of all calls through which given hard + register should be saved. */ + int call_freq; + /* Stack slot reserved to save the hard register through calls. */ + rtx slot; + /* True if it is first hard register in the chain of hard registers + sharing the same stack slot. */ + int first_p; + /* Order number of the next hard register structure with the same + slot in the chain. -1 represents end of the chain. */ + int next; +}; + +/* Map: hard register number to the corresponding structure. */ +static struct saved_hard_reg *hard_reg_map[FIRST_PSEUDO_REGISTER]; + +/* The number of all structures representing hard registers should be + saved, in order words, the number of used elements in the following + array. */ +static int saved_regs_num; + +/* Pointers to all the structures. Index is the order number of the + corresponding structure. */ +static struct saved_hard_reg *all_saved_regs[FIRST_PSEUDO_REGISTER]; + +/* First called function for work with saved hard registers. */ +static void +initiate_saved_hard_regs (void) +{ + int i; + + saved_regs_num = 0; + for (i = 0; i < FIRST_PSEUDO_REGISTER; i++) + hard_reg_map[i] = NULL; +} + +/* Allocate and return new saved hard register with given REGNO and + CALL_FREQ. */ +static struct saved_hard_reg * +new_saved_hard_reg (int regno, int call_freq) +{ + struct saved_hard_reg *saved_reg; + + saved_reg + = (struct saved_hard_reg *) xmalloc (sizeof (struct saved_hard_reg)); + hard_reg_map[regno] = all_saved_regs[saved_regs_num] = saved_reg; + saved_reg->num = saved_regs_num++; + saved_reg->hard_regno = regno; + saved_reg->call_freq = call_freq; + saved_reg->first_p = FALSE; + saved_reg->next = -1; + return saved_reg; +} + +/* Free memory allocated for the saved hard registers. */ +static void +finish_saved_hard_regs (void) +{ + int i; + + for (i = 0; i < saved_regs_num; i++) + free (all_saved_regs[i]); +} + +/* The function is used to sort the saved hard register structures + according their frequency. */ +static int +saved_hard_reg_compare_func (const void *v1p, const void *v2p) +{ + const struct saved_hard_reg *p1 = *(struct saved_hard_reg * const *) v1p; + const struct saved_hard_reg *p2 = *(struct saved_hard_reg * const *) v2p; + + if (flag_omit_frame_pointer) + { + if (p1->call_freq - p2->call_freq != 0) + return p1->call_freq - p2->call_freq; + } + else if (p2->call_freq - p1->call_freq != 0) + return p2->call_freq - p1->call_freq; + + return p1->num - p2->num; } /* Allocate save areas for any hard registers that might need saving. @@ -286,6 +408,10 @@ init_save_areas (void) overestimate slightly (especially if some of these registers are later used as spill registers), but it should not be significant. + For IRA we use priority coloring to decrease stack slots needed for + saving hard registers through calls. We build conflicts for them + to do coloring. + Future work: In the fallback case we should iterate backwards across all possible @@ -317,65 +443,297 @@ setup_save_areas (void) unsigned int regno = reg_renumber[i]; unsigned int endregno = end_hard_regno (GET_MODE (regno_reg_rtx[i]), regno); - for (r = regno; r < endregno; r++) if (call_used_regs[r]) SET_HARD_REG_BIT (hard_regs_used, r); } - /* Now run through all the call-used hard-registers and allocate - space for them in the caller-save area. Try to allocate space - in a manner which allows multi-register saves/restores to be done. */ - - for (i = 0; i < FIRST_PSEUDO_REGISTER; i++) - for (j = MOVE_MAX_WORDS; j > 0; j--) - { - int do_save = 1; - - /* If no mode exists for this size, try another. Also break out - if we have already saved this hard register. */ - if (regno_save_mode[i][j] == VOIDmode || regno_save_mem[i][1] != 0) - continue; - - /* See if any register in this group has been saved. */ - for (k = 0; k < j; k++) - if (regno_save_mem[i + k][1]) + if (flag_ira && optimize && flag_ira_share_save_slots) + { + rtx insn, slot; + struct insn_chain *chain, *next; + char *saved_reg_conflicts; + unsigned int regno; + int next_k, freq; + struct saved_hard_reg *saved_reg, *saved_reg2, *saved_reg3; + int call_saved_regs_num; + struct saved_hard_reg *call_saved_regs[FIRST_PSEUDO_REGISTER]; + HARD_REG_SET hard_regs_to_save, used_regs, this_insn_sets; + reg_set_iterator rsi; + int best_slot_num; + int prev_save_slots_num; + rtx prev_save_slots[FIRST_PSEUDO_REGISTER]; + + initiate_saved_hard_regs (); + /* Create hard reg saved regs. */ + for (chain = reload_insn_chain; chain != 0; chain = next) + { + insn = chain->insn; + next = chain->next; + if (GET_CODE (insn) != CALL_INSN + || find_reg_note (insn, REG_NORETURN, NULL)) + continue; + freq = REG_FREQ_FROM_BB (BLOCK_FOR_INSN (insn)); + REG_SET_TO_HARD_REG_SET (hard_regs_to_save, + &chain->live_throughout); + COPY_HARD_REG_SET (used_regs, call_used_reg_set); + + /* Record all registers set in this call insn. These don't + need to be saved. N.B. the call insn might set a subreg + of a multi-hard-reg pseudo; then the pseudo is considered + live during the call, but the subreg that is set + isn't. */ + CLEAR_HARD_REG_SET (this_insn_sets); + note_stores (PATTERN (insn), mark_set_regs, &this_insn_sets); + /* Sibcalls are considered to set the return value. */ + if (SIBLING_CALL_P (insn) && crtl->return_rtx) + mark_set_regs (crtl->return_rtx, NULL_RTX, &this_insn_sets); + + AND_COMPL_HARD_REG_SET (used_regs, call_fixed_reg_set); + AND_COMPL_HARD_REG_SET (used_regs, this_insn_sets); + AND_HARD_REG_SET (hard_regs_to_save, used_regs); + for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++) + if (TEST_HARD_REG_BIT (hard_regs_to_save, regno)) + { + if (hard_reg_map[regno] != NULL) + hard_reg_map[regno]->call_freq += freq; + else + saved_reg = new_saved_hard_reg (regno, freq); + } + /* Look through all live pseudos, mark their hard registers. */ + EXECUTE_IF_SET_IN_REG_SET + (&chain->live_throughout, FIRST_PSEUDO_REGISTER, regno, rsi) { - do_save = 0; - break; + int r = reg_renumber[regno]; + int bound; + + if (r < 0) + continue; + + bound = r + hard_regno_nregs[r][PSEUDO_REGNO_MODE (regno)]; + for (; r < bound; r++) + if (TEST_HARD_REG_BIT (used_regs, r)) + { + if (hard_reg_map[r] != NULL) + hard_reg_map[r]->call_freq += freq; + else + saved_reg = new_saved_hard_reg (r, freq); + SET_HARD_REG_BIT (hard_regs_to_save, r); + } } - if (! do_save) - continue; + } + /* Find saved hard register conflicts. */ + saved_reg_conflicts = (char *) xmalloc (saved_regs_num * saved_regs_num); + memset (saved_reg_conflicts, 0, saved_regs_num * saved_regs_num); + for (chain = reload_insn_chain; chain != 0; chain = next) + { + call_saved_regs_num = 0; + insn = chain->insn; + next = chain->next; + if (GET_CODE (insn) != CALL_INSN + || find_reg_note (insn, REG_NORETURN, NULL)) + continue; + REG_SET_TO_HARD_REG_SET (hard_regs_to_save, + &chain->live_throughout); + COPY_HARD_REG_SET (used_regs, call_used_reg_set); + + /* Record all registers set in this call insn. These don't + need to be saved. N.B. the call insn might set a subreg + of a multi-hard-reg pseudo; then the pseudo is considered + live during the call, but the subreg that is set + isn't. */ + CLEAR_HARD_REG_SET (this_insn_sets); + note_stores (PATTERN (insn), mark_set_regs, &this_insn_sets); + /* Sibcalls are considered to set the return value, + compare flow.c:propagate_one_insn. */ + if (SIBLING_CALL_P (insn) && crtl->return_rtx) + mark_set_regs (crtl->return_rtx, NULL_RTX, &this_insn_sets); + + AND_COMPL_HARD_REG_SET (used_regs, call_fixed_reg_set); + AND_COMPL_HARD_REG_SET (used_regs, this_insn_sets); + AND_HARD_REG_SET (hard_regs_to_save, used_regs); + for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++) + if (TEST_HARD_REG_BIT (hard_regs_to_save, regno)) + { + gcc_assert (hard_reg_map[regno] != NULL); + call_saved_regs[call_saved_regs_num++] = hard_reg_map[regno]; + } + /* Look through all live pseudos, mark their hard registers. */ + EXECUTE_IF_SET_IN_REG_SET + (&chain->live_throughout, FIRST_PSEUDO_REGISTER, regno, rsi) + { + int r = reg_renumber[regno]; + int bound; + + if (r < 0) + continue; - for (k = 0; k < j; k++) - if (! TEST_HARD_REG_BIT (hard_regs_used, i + k)) + bound = r + hard_regno_nregs[r][PSEUDO_REGNO_MODE (regno)]; + for (; r < bound; r++) + if (TEST_HARD_REG_BIT (used_regs, r)) + call_saved_regs[call_saved_regs_num++] = hard_reg_map[r]; + } + for (i = 0; i < call_saved_regs_num; i++) { - do_save = 0; - break; + saved_reg = call_saved_regs[i]; + for (j = 0; j < call_saved_regs_num; j++) + if (i != j) + { + saved_reg2 = call_saved_regs[j]; + saved_reg_conflicts[saved_reg->num * saved_regs_num + + saved_reg2->num] + = saved_reg_conflicts[saved_reg2->num * saved_regs_num + + saved_reg->num] + = TRUE; + } } - if (! do_save) - continue; - - /* We have found an acceptable mode to store in. Since hard - register is always saved in the widest mode available, - the mode may be wider than necessary, it is OK to reduce - the alignment of spill space. We will verify that it is - equal to or greater than required when we restore and save - the hard register in insert_restore and insert_save. */ - regno_save_mem[i][j] - = assign_stack_local_1 (regno_save_mode[i][j], - GET_MODE_SIZE (regno_save_mode[i][j]), - 0, true); - - /* Setup single word save area just in case... */ - for (k = 0; k < j; k++) - /* This should not depend on WORDS_BIG_ENDIAN. - The order of words in regs is the same as in memory. */ - regno_save_mem[i + k][1] - = adjust_address_nv (regno_save_mem[i][j], - regno_save_mode[i + k][1], - k * UNITS_PER_WORD); - } + } + /* Sort saved hard regs. */ + qsort (all_saved_regs, saved_regs_num, sizeof (struct saved_hard_reg *), + saved_hard_reg_compare_func); + /* Initiate slots available from the previous reload + iteration. */ + prev_save_slots_num = save_slots_num; + memcpy (prev_save_slots, save_slots, save_slots_num * sizeof (rtx)); + save_slots_num = 0; + /* Allocate stack slots for the saved hard registers. */ + for (i = 0; i < saved_regs_num; i++) + { + saved_reg = all_saved_regs[i]; + regno = saved_reg->hard_regno; + for (j = 0; j < i; j++) + { + saved_reg2 = all_saved_regs[j]; + if (! saved_reg2->first_p) + continue; + slot = saved_reg2->slot; + for (k = j; k >= 0; k = next_k) + { + saved_reg3 = all_saved_regs[k]; + next_k = saved_reg3->next; + if (saved_reg_conflicts[saved_reg->num * saved_regs_num + + saved_reg3->num]) + break; + } + if (k < 0 + && (GET_MODE_SIZE (regno_save_mode[regno][1]) + <= GET_MODE_SIZE (regno_save_mode + [saved_reg2->hard_regno][1]))) + { + saved_reg->slot + = adjust_address_nv + (slot, regno_save_mode[saved_reg->hard_regno][1], 0); + regno_save_mem[regno][1] = saved_reg->slot; + saved_reg->next = saved_reg2->next; + saved_reg2->next = i; + if (dump_file != NULL) + fprintf (dump_file, "%d uses slot of %d\n", + regno, saved_reg2->hard_regno); + break; + } + } + if (j == i) + { + saved_reg->first_p = TRUE; + for (best_slot_num = -1, j = 0; j < prev_save_slots_num; j++) + { + slot = prev_save_slots[j]; + if (slot == NULL_RTX) + continue; + if (GET_MODE_SIZE (regno_save_mode[regno][1]) + <= GET_MODE_SIZE (GET_MODE (slot)) + && best_slot_num < 0) + best_slot_num = j; + if (GET_MODE (slot) == regno_save_mode[regno][1]) + break; + } + if (best_slot_num >= 0) + { + saved_reg->slot = prev_save_slots[best_slot_num]; + saved_reg->slot + = adjust_address_nv + (saved_reg->slot, + regno_save_mode[saved_reg->hard_regno][1], 0); + if (dump_file != NULL) + fprintf (dump_file, + "%d uses a slot from prev iteration\n", regno); + prev_save_slots[best_slot_num] = NULL_RTX; + if (best_slot_num + 1 == prev_save_slots_num) + prev_save_slots_num--; + } + else + { + saved_reg->slot + = assign_stack_local_1 + (regno_save_mode[regno][1], + GET_MODE_SIZE (regno_save_mode[regno][1]), 0, true); + if (dump_file != NULL) + fprintf (dump_file, "%d uses a new slot\n", regno); + } + regno_save_mem[regno][1] = saved_reg->slot; + save_slots[save_slots_num++] = saved_reg->slot; + } + } + free (saved_reg_conflicts); + finish_saved_hard_regs (); + } + else + { + /* Now run through all the call-used hard-registers and allocate + space for them in the caller-save area. Try to allocate space + in a manner which allows multi-register saves/restores to be done. */ + + for (i = 0; i < FIRST_PSEUDO_REGISTER; i++) + for (j = MOVE_MAX_WORDS; j > 0; j--) + { + int do_save = 1; + + /* If no mode exists for this size, try another. Also break out + if we have already saved this hard register. */ + if (regno_save_mode[i][j] == VOIDmode || regno_save_mem[i][1] != 0) + continue; + + /* See if any register in this group has been saved. */ + for (k = 0; k < j; k++) + if (regno_save_mem[i + k][1]) + { + do_save = 0; + break; + } + if (! do_save) + continue; + + for (k = 0; k < j; k++) + if (! TEST_HARD_REG_BIT (hard_regs_used, i + k)) + { + do_save = 0; + break; + } + if (! do_save) + continue; + + /* We have found an acceptable mode to store in. Since + hard register is always saved in the widest mode + available, the mode may be wider than necessary, it is + OK to reduce the alignment of spill space. We will + verify that it is equal to or greater than required + when we restore and save the hard register in + insert_restore and insert_save. */ + regno_save_mem[i][j] + = assign_stack_local_1 (regno_save_mode[i][j], + GET_MODE_SIZE (regno_save_mode[i][j]), + 0, true); + + /* Setup single word save area just in case... */ + for (k = 0; k < j; k++) + /* This should not depend on WORDS_BIG_ENDIAN. + The order of words in regs is the same as in memory. */ + regno_save_mem[i + k][1] + = adjust_address_nv (regno_save_mem[i][j], + regno_save_mode[i + k][1], + k * UNITS_PER_WORD); + } + } /* Now loop again and set the alias set of any save areas we made to the alias set used to represent frame objects. */ @@ -384,7 +742,9 @@ setup_save_areas (void) if (regno_save_mem[i][j] != 0) set_mem_alias_set (regno_save_mem[i][j], get_frame_alias_set ()); } + + /* Find the places where hard regs are live across calls and save them. */ void @@ -461,7 +821,8 @@ save_call_clobbered_regs (void) int nregs; enum machine_mode mode; - gcc_assert (r >= 0); + if (r < 0) + continue; nregs = hard_regno_nregs[r][PSEUDO_REGNO_MODE (regno)]; mode = HARD_REGNO_CALLER_SAVE_MODE (r, nregs, PSEUDO_REGNO_MODE (regno)); @@ -497,7 +858,7 @@ save_call_clobbered_regs (void) } } - if (chain->next == 0 || chain->next->block > chain->block) + if (chain->next == 0 || chain->next->block != chain->block) { int regno; /* At the end of the basic block, we must restore any registers that @@ -713,7 +1074,8 @@ insert_restore (struct insn_chain *chain, int before_p, int regno, /* Verify that the alignment of spill space is equal to or greater than required. */ - gcc_assert (GET_MODE_ALIGNMENT (GET_MODE (mem)) <= MEM_ALIGN (mem)); + gcc_assert (MIN (MAX_SUPPORTED_STACK_ALIGNMENT, + GET_MODE_ALIGNMENT (GET_MODE (mem))) <= MEM_ALIGN (mem)); pat = gen_rtx_SET (VOIDmode, gen_rtx_REG (GET_MODE (mem), @@ -790,7 +1152,8 @@ insert_save (struct insn_chain *chain, int before_p, int regno, /* Verify that the alignment of spill space is equal to or greater than required. */ - gcc_assert (GET_MODE_ALIGNMENT (GET_MODE (mem)) <= MEM_ALIGN (mem)); + gcc_assert (MIN (MAX_SUPPORTED_STACK_ALIGNMENT, + GET_MODE_ALIGNMENT (GET_MODE (mem))) <= MEM_ALIGN (mem)); pat = gen_rtx_SET (VOIDmode, mem, gen_rtx_REG (GET_MODE (mem), diff --git a/gcc/cfgloopanal.c b/gcc/cfgloopanal.c index c00d1c501be..db5bd2a62b9 100644 --- a/gcc/cfgloopanal.c +++ b/gcc/cfgloopanal.c @@ -29,6 +29,7 @@ along with GCC; see the file COPYING3. If not see #include "expr.h" #include "output.h" #include "graphds.h" +#include "params.h" /* Checks whether BB is executed exactly once in each LOOP iteration. */ @@ -372,6 +373,7 @@ init_set_costs (void) unsigned estimate_reg_pressure_cost (unsigned n_new, unsigned n_old) { + unsigned cost; unsigned regs_needed = n_new + n_old; /* If we have enough registers, we should use them and not restrict @@ -379,12 +381,25 @@ estimate_reg_pressure_cost (unsigned n_new, unsigned n_old) if (regs_needed + target_res_regs <= target_avail_regs) return 0; - /* If we are close to running out of registers, try to preserve them. */ if (regs_needed <= target_avail_regs) - return target_reg_cost * n_new; - - /* If we run out of registers, it is very expensive to add another one. */ - return target_spill_cost * n_new; + /* If we are close to running out of registers, try to preserve + them. */ + cost = target_reg_cost * n_new; + else + /* If we run out of registers, it is very expensive to add another + one. */ + cost = target_spill_cost * n_new; + + if (optimize && flag_ira && (flag_ira_algorithm == IRA_ALGORITHM_REGIONAL + || flag_ira_algorithm == IRA_ALGORITHM_MIXED) + && number_of_loops () <= (unsigned) IRA_MAX_LOOPS_NUM) + /* IRA regional allocation deals with high register pressure + better. So decrease the cost (to do more accurate the cost + calculation for IRA, we need to know how many registers lives + through the loop transparently). */ + cost /= 2; + + return cost; } /* Sets EDGE_LOOP_EXIT flag for all loop exits. */ diff --git a/gcc/common.opt b/gcc/common.opt index 523f71206c7..21d1ae3396c 100644 --- a/gcc/common.opt +++ b/gcc/common.opt @@ -653,6 +653,30 @@ Common Report Var(flag_ipa_struct_reorg) Perform structure layout optimizations based on profiling information. +fira +Common Report Var(flag_ira) Init(0) +Use integrated register allocator. + +fira-algorithm= +Common Joined RejectNegative +-fira-algorithm=[regional|CB|mixed] Set the used IRA algorithm + +fira-coalesce +Common Report Var(flag_ira_coalesce) Init(0) +Do optimistic coalescing. + +fira-share-save-slots +Common Report Var(flag_ira_share_save_slots) Init(1) +Share slots for saving different hard registers. + +fira-share-spill-slots +Common Report Var(flag_ira_share_spill_slots) Init(1) +Share stack slots for spilled pseudo-registers. + +fira-verbose= +Common RejectNegative Joined UInteger +-fira-verbose=<number> Control IRA's level of diagnostic messages. + fivopts Common Report Var(flag_ivopts) Init(1) Optimization Optimize induction variables on trees diff --git a/gcc/config/alpha/alpha.h b/gcc/config/alpha/alpha.h index b37a19d1e44..4336e6c9357 100644 --- a/gcc/config/alpha/alpha.h +++ b/gcc/config/alpha/alpha.h @@ -553,6 +553,19 @@ enum reg_class { {0x00000000, 0x7fffffff}, /* FLOAT_REGS */ \ {0xffffffff, 0xffffffff} } +/* The following macro defines cover classes for Integrated Register + Allocator. Cover classes is a set of non-intersected register + classes covering all hard registers used for register allocation + purpose. Any move between two registers of a cover class should be + cheaper than load or store of the registers. The macro value is + array of register classes with LIM_REG_CLASSES used as the end + marker. */ + +#define IRA_COVER_CLASSES \ +{ \ + GENERAL_REGS, FLOAT_REGS, LIM_REG_CLASSES \ +} + /* The same information, inverted: Return the class number of the smallest class containing reg number REGNO. This could be a conditional expression diff --git a/gcc/config/arm/arm.h b/gcc/config/arm/arm.h index d99f77d1247..fd5067adfd8 100644 --- a/gcc/config/arm/arm.h +++ b/gcc/config/arm/arm.h @@ -1185,6 +1185,20 @@ enum reg_class or could index an array. */ #define REGNO_REG_CLASS(REGNO) arm_regno_class (REGNO) +/* The following macro defines cover classes for Integrated Register + Allocator. Cover classes is a set of non-intersected register + classes covering all hard registers used for register allocation + purpose. Any move between two registers of a cover class should be + cheaper than load or store of the registers. The macro value is + array of register classes with LIM_REG_CLASSES used as the end + marker. */ + +#define IRA_COVER_CLASSES \ +{ \ + GENERAL_REGS, FPA_REGS, CIRRUS_REGS, VFP_REGS, IWMMXT_GR_REGS, IWMMXT_REGS,\ + LIM_REG_CLASSES \ +} + /* FPA registers can't do subreg as all values are reformatted to internal precision. VFP registers may only be accessed in the mode they were set. */ diff --git a/gcc/config/avr/avr.h b/gcc/config/avr/avr.h index 6a27c3b7edb..b5132e26313 100644 --- a/gcc/config/avr/avr.h +++ b/gcc/config/avr/avr.h @@ -291,6 +291,19 @@ enum reg_class { #define REGNO_REG_CLASS(R) avr_regno_reg_class(R) +/* The following macro defines cover classes for Integrated Register + Allocator. Cover classes is a set of non-intersected register + classes covering all hard registers used for register allocation + purpose. Any move between two registers of a cover class should be + cheaper than load or store of the registers. The macro value is + array of register classes with LIM_REG_CLASSES used as the end + marker. */ + +#define IRA_COVER_CLASSES \ +{ \ + GENERAL_REGS, LIM_REG_CLASSES \ +} + #define BASE_REG_CLASS (reload_completed ? BASE_POINTER_REGS : POINTER_REGS) #define INDEX_REG_CLASS NO_REGS diff --git a/gcc/config/bfin/bfin.h b/gcc/config/bfin/bfin.h index 6f2d16c98d6..826e60b9e02 100644 --- a/gcc/config/bfin/bfin.h +++ b/gcc/config/bfin/bfin.h @@ -711,6 +711,19 @@ enum reg_class : (REGNO) >= REG_RETS ? PROLOGUE_REGS \ : NO_REGS) +/* The following macro defines cover classes for Integrated Register + Allocator. Cover classes is a set of non-intersected register + classes covering all hard registers used for register allocation + purpose. Any move between two registers of a cover class should be + cheaper than load or store of the registers. The macro value is + array of register classes with LIM_REG_CLASSES used as the end + marker. */ + +#define IRA_COVER_CLASSES \ +{ \ + MOST_REGS, AREGS, CCREGS, LIM_REG_CLASSES \ +} + /* When defined, the compiler allows registers explicitly used in the rtl to be used as spill registers but prevents the compiler from extending the lifetime of these registers. */ diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h index 23871561544..69c7472909f 100644 --- a/gcc/config/i386/i386.h +++ b/gcc/config/i386/i386.h @@ -1274,6 +1274,19 @@ enum reg_class { 0xffffffff,0x1fffff } \ } +/* The following macro defines cover classes for Integrated Register + Allocator. Cover classes is a set of non-intersected register + classes covering all hard registers used for register allocation + purpose. Any move between two registers of a cover class should be + cheaper than load or store of the registers. The macro value is + array of register classes with LIM_REG_CLASSES used as the end + marker. */ + +#define IRA_COVER_CLASSES \ +{ \ + GENERAL_REGS, FLOAT_REGS, MMX_REGS, SSE_REGS, LIM_REG_CLASSES \ +} + /* The same information, inverted: Return the class number of the smallest class containing reg number REGNO. This could be a conditional expression diff --git a/gcc/config/ia64/ia64.h b/gcc/config/ia64/ia64.h index 60934f2f38b..6fca6902408 100644 --- a/gcc/config/ia64/ia64.h +++ b/gcc/config/ia64/ia64.h @@ -800,6 +800,19 @@ enum reg_class 0xFFFFFFFF, 0xFFFFFFFF, 0x3FFF }, \ } +/* The following macro defines cover classes for Integrated Register + Allocator. Cover classes is a set of non-intersected register + classes covering all hard registers used for register allocation + purpose. Any move between two registers of a cover class should be + cheaper than load or store of the registers. The macro value is + array of register classes with LIM_REG_CLASSES used as the end + marker. */ + +#define IRA_COVER_CLASSES \ +{ \ + PR_REGS, BR_REGS, AR_M_REGS, AR_I_REGS, GR_REGS, FR_REGS, LIM_REG_CLASSES \ +} + /* A C expression whose value is a register class containing hard register REGNO. In general there is more than one such class; choose a class which is "minimal", meaning that no smaller class also contains the register. */ diff --git a/gcc/config/mn10300/mn10300.h b/gcc/config/mn10300/mn10300.h index 1d2339b6b15..07035fbb06b 100644 --- a/gcc/config/mn10300/mn10300.h +++ b/gcc/config/mn10300/mn10300.h @@ -295,6 +295,19 @@ enum reg_class { { 0xffffffff, 0x3ffff } /* ALL_REGS */ \ } +/* The following macro defines cover classes for Integrated Register + Allocator. Cover classes is a set of non-intersected register + classes covering all hard registers used for register allocation + purpose. Any move between two registers of a cover class should be + cheaper than load or store of the registers. The macro value is + array of register classes with LIM_REG_CLASSES used as the end + marker. */ + +#define IRA_COVER_CLASSES \ +{ \ + GENERAL_REGS, FP_REGS, LIM_REG_CLASSES \ +} + /* The same information, inverted: Return the class number of the smallest class containing reg number REGNO. This could be a conditional expression diff --git a/gcc/config/rs6000/rs6000.h b/gcc/config/rs6000/rs6000.h index 1f6d07b8c06..048d163ff14 100644 --- a/gcc/config/rs6000/rs6000.h +++ b/gcc/config/rs6000/rs6000.h @@ -1128,6 +1128,22 @@ enum reg_class { 0xffffffff, 0xffffffff, 0xffffffff, 0x0003ffff } /* ALL_REGS */ \ } +/* The following macro defines cover classes for Integrated Register + Allocator. Cover classes is a set of non-intersected register + classes covering all hard registers used for register allocation + purpose. Any move between two registers of a cover class should be + cheaper than load or store of the registers. The macro value is + array of register classes with LIM_REG_CLASSES used as the end + marker. */ + +#define IRA_COVER_CLASSES \ +{ \ + GENERAL_REGS, SPECIAL_REGS, FLOAT_REGS, ALTIVEC_REGS, \ + /*VRSAVE_REGS,*/ VSCR_REGS, SPE_ACC_REGS, SPEFSCR_REGS, \ + /* MQ_REGS, LINK_REGS, CTR_REGS, */ \ + CR_REGS, XER_REGS, LIM_REG_CLASSES \ +} + /* The same information, inverted: Return the class number of the smallest class containing reg number REGNO. This could be a conditional expression diff --git a/gcc/config/s390/s390.h b/gcc/config/s390/s390.h index a8cb4774969..b96f10026e7 100644 --- a/gcc/config/s390/s390.h +++ b/gcc/config/s390/s390.h @@ -478,6 +478,30 @@ enum reg_class { 0xffffffff, 0x0000003f }, /* ALL_REGS */ \ } +/* The following macro defines cover classes for Integrated Register + Allocator. Cover classes is a set of non-intersected register + classes covering all hard registers used for register allocation + purpose. Any move between two registers of a cover class should be + cheaper than load or store of the registers. The macro value is + array of register classes with LIM_REG_CLASSES used as the end + marker. */ + +#define IRA_COVER_CLASSES \ +{ \ + GENERAL_REGS, FP_REGS, CC_REGS, ACCESS_REGS, LIM_REG_CLASSES \ +} + +/* In some case register allocation order is not enough for IRA to + generate a good code. The following macro (if defined) increases + cost of REGNO for a pseudo approximately by pseudo usage frequency + multiplied by the macro value. + + We avoid usage of BASE_REGNUM by nonzero macro value because the + reload can decide not to use the hard register because some + constant was forced to be in memory. */ +#define IRA_HARD_REGNO_ADD_COST_MULTIPLIER(regno) \ + (regno == BASE_REGNUM ? 0.0 : 0.5) + /* Register -> class mapping. */ extern const enum reg_class regclass_map[FIRST_PSEUDO_REGISTER]; #define REGNO_REG_CLASS(REGNO) (regclass_map[REGNO]) diff --git a/gcc/config/sh/sh.h b/gcc/config/sh/sh.h index 2305872903d..8af2f436169 100644 --- a/gcc/config/sh/sh.h +++ b/gcc/config/sh/sh.h @@ -1499,6 +1499,20 @@ enum reg_class extern enum reg_class regno_reg_class[FIRST_PSEUDO_REGISTER]; #define REGNO_REG_CLASS(REGNO) regno_reg_class[(REGNO)] +/* The following macro defines cover classes for Integrated Register + Allocator. Cover classes is a set of non-intersected register + classes covering all hard registers used for register allocation + purpose. Any move between two registers of a cover class should be + cheaper than load or store of the registers. The macro value is + array of register classes with LIM_REG_CLASSES used as the end + marker. */ + +#define IRA_COVER_CLASSES \ +{ \ + GENERAL_REGS, FP_REGS, PR_REGS, T_REGS, MAC_REGS, TARGET_REGS, \ + LIM_REG_CLASSES \ +} + /* When defined, the compiler allows registers explicitly used in the rtl to be used as spill registers but prevents the compiler from extending the lifetime of these registers. */ diff --git a/gcc/config/sh/sh.md b/gcc/config/sh/sh.md index 6300054220a..b6e56aa5a5e 100644 --- a/gcc/config/sh/sh.md +++ b/gcc/config/sh/sh.md @@ -1143,7 +1143,7 @@ (set (match_dup 4) (match_dup 5))] " { - rtx set1, set2; + rtx set1, set2, insn2; rtx replacements[4]; /* We want to replace occurrences of operands[0] with operands[1] and @@ -1173,7 +1173,10 @@ extract_insn (emit_insn (set1)); if (! constrain_operands (1)) goto failure; - extract_insn (emit (set2)); + insn2 = emit (set2); + if (GET_CODE (insn2) == BARRIER) + goto failure; + extract_insn (insn2); if (! constrain_operands (1)) { rtx tmp; diff --git a/gcc/config/sparc/sparc.h b/gcc/config/sparc/sparc.h index 4d180da8285..42894705361 100644 --- a/gcc/config/sparc/sparc.h +++ b/gcc/config/sparc/sparc.h @@ -1078,6 +1078,19 @@ enum reg_class { NO_REGS, FPCC_REGS, I64_REGS, GENERAL_REGS, FP_REGS, {-1, -1, -1, 0x20}, /* GENERAL_OR_EXTRA_FP_REGS */ \ {-1, -1, -1, 0x3f}} /* ALL_REGS */ +/* The following macro defines cover classes for Integrated Register + Allocator. Cover classes is a set of non-intersected register + classes covering all hard registers used for register allocation + purpose. Any move between two registers of a cover class should be + cheaper than load or store of the registers. The macro value is + array of register classes with LIM_REG_CLASSES used as the end + marker. */ + +#define IRA_COVER_CLASSES \ +{ \ + GENERAL_REGS, EXTRA_FP_REGS, FPCC_REGS, LIM_REG_CLASSES \ +} + /* Defines invalid mode changes. Borrowed from pa64-regs.h. SImode loads to floating-point registers are not zero-extended. diff --git a/gcc/config/spu/spu.h b/gcc/config/spu/spu.h index 0bd69d365cd..86042aacb2f 100644 --- a/gcc/config/spu/spu.h +++ b/gcc/config/spu/spu.h @@ -196,6 +196,9 @@ enum reg_class { LIM_REG_CLASSES }; +/* SPU is simple, it really only has one class of registers. */ +#define IRA_COVER_CLASSES { GENERAL_REGS, LIM_REG_CLASSES } + #define N_REG_CLASSES (int) LIM_REG_CLASSES #define REG_CLASS_NAMES \ diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi index 3789be6fdbd..0c4e6b4366e 100644 --- a/gcc/doc/invoke.texi +++ b/gcc/doc/invoke.texi @@ -274,7 +274,8 @@ Objective-C and Objective-C++ Dialects}. @xref{Debugging Options,,Options for Debugging Your Program or GCC}. @gccoptlist{-d@var{letters} -dumpspecs -dumpmachine -dumpversion @gol -fdbg-cnt-list -fdbg-cnt=@var{counter-value-list} @gol --fdump-noaddr -fdump-unnumbered -fdump-translation-unit@r{[}-@var{n}@r{]} @gol +-fdump-noaddr -fdump-unnumbered @gol +-fdump-translation-unit@r{[}-@var{n}@r{]} @gol -fdump-class-hierarchy@r{[}-@var{n}@r{]} @gol -fdump-ipa-all -fdump-ipa-cgraph -fdump-ipa-inline @gol -fdump-statistics @gol @@ -332,7 +333,10 @@ Objective-C and Objective-C++ Dialects}. -finline-functions -finline-functions-called-once -finline-limit=@var{n} @gol -finline-small-functions -fipa-cp -fipa-cp-clone -fipa-marix-reorg -fipa-pta @gol -fipa-pure-const -fipa-reference -fipa-struct-reorg @gol --fipa-type-escape -fivopts -fkeep-inline-functions -fkeep-static-consts @gol +-fipa-type-escape -fira -fira-algorithm=@var{algorithm} @gol +-fira-coalesce -fno-ira-share-save-slots @gol +-fno-ira-share-spill-slots -fira-verbose=@var{n} @gol +-fivopts -fkeep-inline-functions -fkeep-static-consts @gol -fmerge-all-constants -fmerge-constants -fmodulo-sched @gol -fmodulo-sched-allow-regmoves -fmove-loop-invariants -fmudflap @gol -fmudflapir -fmudflapth -fno-branch-count-reg -fno-default-inline @gol @@ -5673,6 +5677,49 @@ optimization. Enabled at levels @option{-O2}, @option{-O3}, @option{-Os}. +@item -fira +@opindex fira +Use the integrated register allocator (@acronym{IRA}) for register +allocation. It is a default if @acronym{IRA} has been ported for the +target. + +@item -fira-algorithm=@var{algorithm} +Use specified algorithm for the integrated register allocator. The +@var{algorithm} argument should be one of @code{regional}, @code{CB}, +or @code{mixed}. The second algorithm specifies Chaitin-Briggs +coloring, the first one specifies regional coloring based on +Chaitin-Briggs coloring, and the third one which is the default +specifies a mix of Chaitin-Briggs and regional algorithms where loops +with small register pressure are ignored. The first algorithm can +give best result for machines with small size and irregular register +set, the second one is faster and generates decent code and the +smallest size code, and the mixed algorithm usually give the best +results in most cases and for most architectures. + +@item -fira-coalesce +@opindex fira-coalesce +Do optimistic register coalescing. This option might be profitable for +architectures with big regular register files. + +@item -fno-ira-share-save-slots +@opindex fno-ira-share-save-slots +Switch off sharing stack slots used for saving call used hard +registers living through a call. Each hard register will get a +separate stack slot and as a result function stack frame will be +bigger. + +@item -fno-ira-share-spill-slots +@opindex fno-ira-share-spill-slots +Switch off sharing stack slots allocated for pseudo-registers. Each +pseudo-register which did not get a hard register will get a separate +stack slot and as a result function stack frame will be bigger. + +@item -fira-verbose=@var{n} +@opindex fira-verbose +Set up how verbose dump file for the integrated register allocator +will be. Default value is 5. If the value is greater or equal to 10, +the dump file will be stderr as if the value were @var{n} minus 10. + @item -fdelayed-branch @opindex fdelayed-branch If supported for the target machine, attempt to reorder instructions @@ -7384,6 +7431,13 @@ processing. If this limit is hit, SCCVN processing for the whole function will not be done and optimizations depending on it will be disabled. The default maximum SCC size is 10000. +@item ira-max-loops-num +IRA uses a regional register allocation by default. If a function +contains loops more than number given by the parameter, non-regional +register allocator will be used even when option +@option{-fira-algorithm} is given. The default value of the parameter +is 20. + @end table @end table diff --git a/gcc/doc/passes.texi b/gcc/doc/passes.texi index daeaf9520e1..9004dd763ec 100644 --- a/gcc/doc/passes.texi +++ b/gcc/doc/passes.texi @@ -841,6 +841,28 @@ Global register allocation. This pass allocates hard registers for the remaining pseudo registers (those whose life spans are not contained in one basic block). The pass is located in @file{global.c}. +@item +The optional integrated register allocator (@acronym{IRA}). It can be +used instead of the local and global allocator. It is called +integrated because coalescing, register live range splitting, and hard +register preferencing are done on-the-fly during coloring. It also +has better integration with the reload pass. Pseudo-registers spilled +by the allocator or the reload have still a chance to get +hard-registers if the reload evicts some pseudo-registers from +hard-registers. The allocator helps to choose better pseudos for +spilling based on their live ranges and to coalesce stack slots +allocated for the spilled pseudo-registers. IRA is a regional +register allocator which is transformed into Chaitin-Briggs allocator +if there is one region. By default, IRA chooses regions using +register pressure but the user can force it to use one region or +regions corresponding to all loops. + +Source files of the allocator are @file{ira.c}, @file{ira-build.c}, +@file{ira-costs.c}, @file{ira-conflicts.c}, @file{ira-color.c}, +@file{ira-emit.c}, @file{ira-lives}, plus header files @file{ira.h} +and @file{ira-int.h} used for the communication between the allocator +and the rest of the compiler and between the IRA files. + @cindex reloading @item Reloading. This pass renumbers pseudo registers with the hardware diff --git a/gcc/doc/tm.texi b/gcc/doc/tm.texi index 9b4a921883f..3087694a9cd 100644 --- a/gcc/doc/tm.texi +++ b/gcc/doc/tm.texi @@ -2026,6 +2026,18 @@ The macro body should not assume anything about the contents of On most machines, it is not necessary to define this macro. @end defmac +@defmac IRA_HARD_REGNO_ADD_COST_MULTIPLIER (@var{regno}) +In some case register allocation order is not enough for the +Integrated Register Allocator (@acronym{IRA}) to generate a good code. +If this macro is defined, it should return a floating point value +based on @var{regno}. The cost of using @var{regno} for a pseudo will +be increased by approximately the pseudo's usage frequency times the +value returned by this macro. Not defining this macro is equivalent +to having it always return @code{0.0}. + +On most machines, it is not necessary to define this macro. +@end defmac + @node Values in Registers @subsection How Values Fit in Registers @@ -2814,6 +2826,19 @@ as below: @end smallexample @end defmac +@defmac IRA_COVER_CLASSES +The macro defines cover classes for the Integrated Register Allocator +(@acronym{IRA}). Cover classes are a set of non-intersecting register +classes covering all hard registers used for register allocation +purposes. Any move between two registers in the same cover class +should be cheaper than load or store of the registers. The macro +value should be the initializer for an array of register class values, +with @code{LIM_REG_CLASSES} used as the end marker. + +You must define this macro in order to use the integrated register +allocator for the target. +@end defmac + @node Old Constraints @section Obsolete Macros for Defining Constraints @cindex defining constraints, obsolete method diff --git a/gcc/flags.h b/gcc/flags.h index 1de81f8e13b..1d645d9ebfa 100644 --- a/gcc/flags.h +++ b/gcc/flags.h @@ -205,6 +205,19 @@ extern int flag_debug_asm; extern int flag_next_runtime; extern int flag_dump_rtl_in_asm; + +/* The algorithm used for the integrated register allocator (IRA). */ +enum ira_algorithm +{ + IRA_ALGORITHM_REGIONAL, + IRA_ALGORITHM_CB, + IRA_ALGORITHM_MIXED +}; + +extern enum ira_algorithm flag_ira_algorithm; + +extern unsigned int flag_ira_verbose; + /* Other basic status info about current function. */ diff --git a/gcc/global.c b/gcc/global.c index 690a80c8a85..e0783d5237c 100644 --- a/gcc/global.c +++ b/gcc/global.c @@ -188,7 +188,7 @@ compute_regs_asm_clobbered (char *regs_asm_clobbered) /* All registers that can be eliminated. */ -static HARD_REG_SET eliminable_regset; +HARD_REG_SET eliminable_regset; static int regno_compare (const void *, const void *); static int allocno_compare (const void *, const void *); @@ -197,7 +197,6 @@ static void prune_preferences (void); static void set_preferences (void); static void find_reg (int, HARD_REG_SET, int, int, int); static void dump_conflicts (FILE *); -static void build_insn_chain (void); /* Look through the list of eliminable registers. Set ELIM_SET to the @@ -1355,7 +1354,8 @@ mark_elimination (int from, int to) FOR_EACH_BB (bb) { - regset r = DF_LIVE_IN (bb); + /* We don't use LIVE info in IRA. */ + regset r = (flag_ira ? DF_LR_IN (bb) : DF_LIVE_IN (bb)); if (REGNO_REG_SET_P (r, from)) { CLEAR_REGNO_REG_SET (r, from); @@ -1385,11 +1385,21 @@ print_insn_chains (FILE *file) print_insn_chain (file, c); } +/* Return true if pseudo REGNO should be added to set live_throughout + or dead_or_set of the insn chains for reload consideration. */ + +static bool +pseudo_for_reload_consideration_p (int regno) +{ + /* Consider spilled pseudos too for IRA because they still have a + chance to get hard-registers in the reload when IRA is used. */ + return reg_renumber[regno] >= 0 || (flag_ira && optimize); +} /* Walk the insns of the current function and build reload_insn_chain, and record register life information. */ -static void +void build_insn_chain (void) { unsigned int i; @@ -1412,7 +1422,6 @@ build_insn_chain (void) for (i = 0; i < FIRST_PSEUDO_REGISTER; i++) if (TEST_HARD_REG_BIT (eliminable_regset, i)) bitmap_set_bit (elim_regset, i); - FOR_EACH_BB_REVERSE (bb) { bitmap_iterator bi; @@ -1430,7 +1439,7 @@ build_insn_chain (void) EXECUTE_IF_SET_IN_BITMAP (df_get_live_out (bb), FIRST_PSEUDO_REGISTER, i, bi) { - if (reg_renumber[i] >= 0) + if (pseudo_for_reload_consideration_p (i)) bitmap_set_bit (live_relevant_regs, i); } @@ -1467,11 +1476,13 @@ build_insn_chain (void) if (!fixed_regs[regno]) bitmap_set_bit (&c->dead_or_set, regno); } - else if (reg_renumber[regno] >= 0) + else if (pseudo_for_reload_consideration_p (regno)) bitmap_set_bit (&c->dead_or_set, regno); } - if ((regno < FIRST_PSEUDO_REGISTER || reg_renumber[regno] >= 0) + if ((regno < FIRST_PSEUDO_REGISTER + || reg_renumber[regno] >= 0 + || (flag_ira && optimize)) && (!DF_REF_FLAGS_IS_SET (def, DF_REF_CONDITIONAL))) { rtx reg = DF_REF_REG (def); @@ -1567,11 +1578,12 @@ build_insn_chain (void) if (!fixed_regs[regno]) bitmap_set_bit (&c->dead_or_set, regno); } - else if (reg_renumber[regno] >= 0) + else if (pseudo_for_reload_consideration_p (regno)) bitmap_set_bit (&c->dead_or_set, regno); } - if (regno < FIRST_PSEUDO_REGISTER || reg_renumber[regno] >= 0) + if (regno < FIRST_PSEUDO_REGISTER + || pseudo_for_reload_consideration_p (regno)) { if (GET_CODE (reg) == SUBREG && !DF_REF_FLAGS_IS_SET (use, @@ -1748,6 +1760,13 @@ dump_global_regs (FILE *file) fprintf (file, "\n\n"); } + +static bool +gate_handle_global_alloc (void) +{ + return ! flag_ira; +} + /* Run old register allocator. Return TRUE if we must exit rest_of_compilation upon return. */ static unsigned int @@ -1811,7 +1830,7 @@ struct rtl_opt_pass pass_global_alloc = { RTL_PASS, "greg", /* name */ - NULL, /* gate */ + gate_handle_global_alloc, /* gate */ rest_of_handle_global_alloc, /* execute */ NULL, /* sub */ NULL, /* next */ diff --git a/gcc/hard-reg-set.h b/gcc/hard-reg-set.h index 21030fdd2df..c4f74a267c0 100644 --- a/gcc/hard-reg-set.h +++ b/gcc/hard-reg-set.h @@ -538,6 +538,11 @@ extern char global_regs[FIRST_PSEUDO_REGISTER]; extern HARD_REG_SET regs_invalidated_by_call; +/* Call used hard registers which can not be saved because there is no + insn for this. */ + +extern HARD_REG_SET no_caller_save_reg_set; + #ifdef REG_ALLOC_ORDER /* Table of register numbers in the order in which to try to use them. */ @@ -556,6 +561,10 @@ extern HARD_REG_SET reg_class_contents[N_REG_CLASSES]; extern unsigned int reg_class_size[N_REG_CLASSES]; +/* For each reg class, table listing all the classes contained in it. */ + +extern enum reg_class reg_class_subclasses[N_REG_CLASSES][N_REG_CLASSES]; + /* For each pair of reg classes, a largest reg class contained in their union. */ diff --git a/gcc/ira-build.c b/gcc/ira-build.c new file mode 100644 index 00000000000..979a3c86685 --- /dev/null +++ b/gcc/ira-build.c @@ -0,0 +1,2449 @@ +/* Building internal representation for IRA. + Copyright (C) 2006, 2007, 2008 + Free Software Foundation, Inc. + Contributed by Vladimir Makarov <vmakarov@redhat.com>. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 3, or (at your option) any later +version. + +GCC is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +<http://www.gnu.org/licenses/>. */ + +#include "config.h" +#include "system.h" +#include "coretypes.h" +#include "tm.h" +#include "rtl.h" +#include "tm_p.h" +#include "target.h" +#include "regs.h" +#include "flags.h" +#include "hard-reg-set.h" +#include "basic-block.h" +#include "insn-config.h" +#include "recog.h" +#include "toplev.h" +#include "params.h" +#include "df.h" +#include "output.h" +#include "reload.h" +#include "sparseset.h" +#include "ira-int.h" + +static ira_copy_t find_allocno_copy (ira_allocno_t, ira_allocno_t, rtx, + ira_loop_tree_node_t); + +/* The root of the loop tree corresponding to the all function. */ +ira_loop_tree_node_t ira_loop_tree_root; + +/* Height of the loop tree. */ +int ira_loop_tree_height; + +/* All nodes representing basic blocks are referred through the + following array. We can not use basic block member `aux' for this + because it is used for insertion of insns on edges. */ +ira_loop_tree_node_t ira_bb_nodes; + +/* All nodes representing loops are referred through the following + array. */ +ira_loop_tree_node_t ira_loop_nodes; + +/* Map regno -> allocnos with given regno (see comments for + allocno member `next_regno_allocno'). */ +ira_allocno_t *ira_regno_allocno_map; + +/* Array of references to all allocnos. The order number of the + allocno corresponds to the index in the array. Removed allocnos + have NULL element value. */ +ira_allocno_t *ira_allocnos; + +/* Sizes of the previous array. */ +int ira_allocnos_num; + +/* Map conflict id -> allocno with given conflict id (see comments for + allocno member `conflict_id'). */ +ira_allocno_t *ira_conflict_id_allocno_map; + +/* Array of references to all copies. The order number of the copy + corresponds to the index in the array. Removed copies have NULL + element value. */ +ira_copy_t *ira_copies; + +/* Size of the previous array. */ +int ira_copies_num; + + + +/* LAST_BASIC_BLOCK before generating additional insns because of live + range splitting. Emitting insns on a critical edge creates a new + basic block. */ +static int last_basic_block_before_change; + +/* The following function allocates the loop tree nodes. If LOOPS_P + is FALSE, the nodes corresponding to the loops (except the root + which corresponds the all function) will be not allocated but nodes + will still be allocated for basic blocks. */ +static void +create_loop_tree_nodes (bool loops_p) +{ + unsigned int i, j; + int max_regno; + bool skip_p; + edge_iterator ei; + edge e; + VEC (edge, heap) *edges; + loop_p loop; + + ira_bb_nodes + = ((struct ira_loop_tree_node *) + ira_allocate (sizeof (struct ira_loop_tree_node) * last_basic_block)); + last_basic_block_before_change = last_basic_block; + for (i = 0; i < (unsigned int) last_basic_block; i++) + { + ira_bb_nodes[i].regno_allocno_map = NULL; + memset (ira_bb_nodes[i].reg_pressure, 0, + sizeof (ira_bb_nodes[i].reg_pressure)); + ira_bb_nodes[i].mentioned_allocnos = NULL; + ira_bb_nodes[i].modified_regnos = NULL; + ira_bb_nodes[i].border_allocnos = NULL; + ira_bb_nodes[i].local_copies = NULL; + } + ira_loop_nodes = ((struct ira_loop_tree_node *) + ira_allocate (sizeof (struct ira_loop_tree_node) + * VEC_length (loop_p, ira_loops.larray))); + max_regno = max_reg_num (); + for (i = 0; VEC_iterate (loop_p, ira_loops.larray, i, loop); i++) + { + if (loop != ira_loops.tree_root) + { + ira_loop_nodes[i].regno_allocno_map = NULL; + if (! loops_p) + continue; + skip_p = false; + FOR_EACH_EDGE (e, ei, loop->header->preds) + if (e->src != loop->latch + && (e->flags & EDGE_ABNORMAL) && EDGE_CRITICAL_P (e)) + { + skip_p = true; + break; + } + if (skip_p) + continue; + edges = get_loop_exit_edges (loop); + for (j = 0; VEC_iterate (edge, edges, j, e); j++) + if ((e->flags & EDGE_ABNORMAL) && EDGE_CRITICAL_P (e)) + { + skip_p = true; + break; + } + VEC_free (edge, heap, edges); + if (skip_p) + continue; + } + ira_loop_nodes[i].regno_allocno_map + = (ira_allocno_t *) ira_allocate (sizeof (ira_allocno_t) * max_regno); + memset (ira_loop_nodes[i].regno_allocno_map, 0, + sizeof (ira_allocno_t) * max_regno); + memset (ira_loop_nodes[i].reg_pressure, 0, + sizeof (ira_loop_nodes[i].reg_pressure)); + ira_loop_nodes[i].mentioned_allocnos = ira_allocate_bitmap (); + ira_loop_nodes[i].modified_regnos = ira_allocate_bitmap (); + ira_loop_nodes[i].border_allocnos = ira_allocate_bitmap (); + ira_loop_nodes[i].local_copies = ira_allocate_bitmap (); + } +} + +/* The function returns TRUE if there are more one allocation + region. */ +static bool +more_one_region_p (void) +{ + unsigned int i; + loop_p loop; + + for (i = 0; VEC_iterate (loop_p, ira_loops.larray, i, loop); i++) + if (ira_loop_nodes[i].regno_allocno_map != NULL + && ira_loop_tree_root != &ira_loop_nodes[i]) + return true; + return false; +} + +/* Free the loop tree node of a loop. */ +static void +finish_loop_tree_node (ira_loop_tree_node_t loop) +{ + if (loop->regno_allocno_map != NULL) + { + ira_assert (loop->bb == NULL); + ira_free_bitmap (loop->local_copies); + ira_free_bitmap (loop->border_allocnos); + ira_free_bitmap (loop->modified_regnos); + ira_free_bitmap (loop->mentioned_allocnos); + ira_free (loop->regno_allocno_map); + loop->regno_allocno_map = NULL; + } +} + +/* Free the loop tree nodes. */ +static void +finish_loop_tree_nodes (void) +{ + unsigned int i; + loop_p loop; + + for (i = 0; VEC_iterate (loop_p, ira_loops.larray, i, loop); i++) + finish_loop_tree_node (&ira_loop_nodes[i]); + ira_free (ira_loop_nodes); + for (i = 0; i < (unsigned int) last_basic_block_before_change; i++) + { + if (ira_bb_nodes[i].local_copies != NULL) + ira_free_bitmap (ira_bb_nodes[i].local_copies); + if (ira_bb_nodes[i].border_allocnos != NULL) + ira_free_bitmap (ira_bb_nodes[i].border_allocnos); + if (ira_bb_nodes[i].modified_regnos != NULL) + ira_free_bitmap (ira_bb_nodes[i].modified_regnos); + if (ira_bb_nodes[i].mentioned_allocnos != NULL) + ira_free_bitmap (ira_bb_nodes[i].mentioned_allocnos); + if (ira_bb_nodes[i].regno_allocno_map != NULL) + ira_free (ira_bb_nodes[i].regno_allocno_map); + } + ira_free (ira_bb_nodes); +} + + + +/* The following recursive function adds LOOP to the loop tree + hierarchy. LOOP is added only once. */ +static void +add_loop_to_tree (struct loop *loop) +{ + struct loop *parent; + ira_loop_tree_node_t loop_node, parent_node; + + /* We can not use loop node access macros here because of potential + checking and because the nodes are not initialized enough + yet. */ + if (loop_outer (loop) != NULL) + add_loop_to_tree (loop_outer (loop)); + if (ira_loop_nodes[loop->num].regno_allocno_map != NULL + && ira_loop_nodes[loop->num].children == NULL) + { + /* We have not added loop node to the tree yet. */ + loop_node = &ira_loop_nodes[loop->num]; + loop_node->loop = loop; + loop_node->bb = NULL; + for (parent = loop_outer (loop); + parent != NULL; + parent = loop_outer (parent)) + if (ira_loop_nodes[parent->num].regno_allocno_map != NULL) + break; + if (parent == NULL) + { + loop_node->next = NULL; + loop_node->subloop_next = NULL; + loop_node->parent = NULL; + } + else + { + parent_node = &ira_loop_nodes[parent->num]; + loop_node->next = parent_node->children; + parent_node->children = loop_node; + loop_node->subloop_next = parent_node->subloops; + parent_node->subloops = loop_node; + loop_node->parent = parent_node; + } + } +} + +/* The following recursive function sets up levels of nodes of the + tree given its root LOOP_NODE. The enumeration starts with LEVEL. + The function returns maximal value of level in the tree + 1. */ +static int +setup_loop_tree_level (ira_loop_tree_node_t loop_node, int level) +{ + int height, max_height; + ira_loop_tree_node_t subloop_node; + + ira_assert (loop_node->bb == NULL); + loop_node->level = level; + max_height = level + 1; + for (subloop_node = loop_node->subloops; + subloop_node != NULL; + subloop_node = subloop_node->subloop_next) + { + ira_assert (subloop_node->bb == NULL); + height = setup_loop_tree_level (subloop_node, level + 1); + if (height > max_height) + max_height = height; + } + return max_height; +} + +/* Create the loop tree. The algorithm is designed to provide correct + order of loops (they are ordered by their last loop BB) and basic + blocks in the chain formed by member next. */ +static void +form_loop_tree (void) +{ + unsigned int i; + basic_block bb; + struct loop *parent; + ira_loop_tree_node_t bb_node, loop_node; + loop_p loop; + + /* We can not use loop/bb node access macros because of potential + checking and because the nodes are not initialized enough + yet. */ + for (i = 0; VEC_iterate (loop_p, ira_loops.larray, i, loop); i++) + if (ira_loop_nodes[i].regno_allocno_map != NULL) + { + ira_loop_nodes[i].children = NULL; + ira_loop_nodes[i].subloops = NULL; + } + FOR_EACH_BB_REVERSE (bb) + { + bb_node = &ira_bb_nodes[bb->index]; + bb_node->bb = bb; + bb_node->loop = NULL; + bb_node->subloops = NULL; + bb_node->children = NULL; + bb_node->subloop_next = NULL; + bb_node->next = NULL; + for (parent = bb->loop_father; + parent != NULL; + parent = loop_outer (parent)) + if (ira_loop_nodes[parent->num].regno_allocno_map != NULL) + break; + add_loop_to_tree (parent); + loop_node = &ira_loop_nodes[parent->num]; + bb_node->next = loop_node->children; + bb_node->parent = loop_node; + loop_node->children = bb_node; + } + ira_loop_tree_root = IRA_LOOP_NODE_BY_INDEX (ira_loops.tree_root->num); + ira_loop_tree_height = setup_loop_tree_level (ira_loop_tree_root, 0); + ira_assert (ira_loop_tree_root->regno_allocno_map != NULL); +} + + + +/* Rebuild IRA_REGNO_ALLOCNO_MAP and REGNO_ALLOCNO_MAPs of the loop + tree nodes. */ +static void +rebuild_regno_allocno_maps (void) +{ + unsigned int l; + int max_regno, regno; + ira_allocno_t a; + ira_loop_tree_node_t loop_tree_node; + loop_p loop; + ira_allocno_iterator ai; + + max_regno = max_reg_num (); + for (l = 0; VEC_iterate (loop_p, ira_loops.larray, l, loop); l++) + if (ira_loop_nodes[l].regno_allocno_map != NULL) + { + ira_free (ira_loop_nodes[l].regno_allocno_map); + ira_loop_nodes[l].regno_allocno_map + = (ira_allocno_t *) ira_allocate (sizeof (ira_allocno_t) + * max_regno); + memset (ira_loop_nodes[l].regno_allocno_map, 0, + sizeof (ira_allocno_t) * max_regno); + } + ira_free (ira_regno_allocno_map); + ira_regno_allocno_map + = (ira_allocno_t *) ira_allocate (max_regno * sizeof (ira_allocno_t)); + memset (ira_regno_allocno_map, 0, max_regno * sizeof (ira_allocno_t)); + FOR_EACH_ALLOCNO (a, ai) + { + if (ALLOCNO_CAP_MEMBER (a) != NULL) + /* Caps are not in the regno allocno maps. */ + continue; + regno = ALLOCNO_REGNO (a); + loop_tree_node = ALLOCNO_LOOP_TREE_NODE (a); + ALLOCNO_NEXT_REGNO_ALLOCNO (a) = ira_regno_allocno_map[regno]; + ira_regno_allocno_map[regno] = a; + if (loop_tree_node->regno_allocno_map[regno] == NULL) + /* Remember that we can create temporary allocnos to break + cycles in register shuffle. */ + loop_tree_node->regno_allocno_map[regno] = a; + } +} + + + +/* Pools for allocnos and allocno live ranges. */ +static alloc_pool allocno_pool, allocno_live_range_pool; + +/* Vec containing references to all created allocnos. It is a + container of array allocnos. */ +static VEC(ira_allocno_t,heap) *allocno_vec; + +/* Vec containing references to all created allocnos. It is a + container of ira_conflict_id_allocno_map. */ +static VEC(ira_allocno_t,heap) *ira_conflict_id_allocno_map_vec; + +/* Initialize data concerning allocnos. */ +static void +initiate_allocnos (void) +{ + allocno_live_range_pool + = create_alloc_pool ("allocno live ranges", + sizeof (struct ira_allocno_live_range), 100); + allocno_pool + = create_alloc_pool ("allocnos", sizeof (struct ira_allocno), 100); + allocno_vec = VEC_alloc (ira_allocno_t, heap, max_reg_num () * 2); + ira_allocnos = NULL; + ira_allocnos_num = 0; + ira_conflict_id_allocno_map_vec + = VEC_alloc (ira_allocno_t, heap, max_reg_num () * 2); + ira_conflict_id_allocno_map = NULL; + ira_regno_allocno_map + = (ira_allocno_t *) ira_allocate (max_reg_num () * sizeof (ira_allocno_t)); + memset (ira_regno_allocno_map, 0, max_reg_num () * sizeof (ira_allocno_t)); +} + +/* Create and return the allocno corresponding to REGNO in + LOOP_TREE_NODE. Add the allocno to the list of allocnos with the + same regno if CAP_P is FALSE. */ +ira_allocno_t +ira_create_allocno (int regno, bool cap_p, ira_loop_tree_node_t loop_tree_node) +{ + ira_allocno_t a; + + a = (ira_allocno_t) pool_alloc (allocno_pool); + ALLOCNO_REGNO (a) = regno; + ALLOCNO_LOOP_TREE_NODE (a) = loop_tree_node; + if (! cap_p) + { + ALLOCNO_NEXT_REGNO_ALLOCNO (a) = ira_regno_allocno_map[regno]; + ira_regno_allocno_map[regno] = a; + if (loop_tree_node->regno_allocno_map[regno] == NULL) + /* Remember that we can create temporary allocnos to break + cycles in register shuffle on region borders (see + ira-emit.c). */ + loop_tree_node->regno_allocno_map[regno] = a; + } + ALLOCNO_CAP (a) = NULL; + ALLOCNO_CAP_MEMBER (a) = NULL; + ALLOCNO_NUM (a) = ira_allocnos_num; + ALLOCNO_CONFLICT_ALLOCNO_ARRAY (a) = NULL; + ALLOCNO_CONFLICT_ALLOCNOS_NUM (a) = 0; + COPY_HARD_REG_SET (ALLOCNO_CONFLICT_HARD_REGS (a), ira_no_alloc_regs); + COPY_HARD_REG_SET (ALLOCNO_TOTAL_CONFLICT_HARD_REGS (a), ira_no_alloc_regs); + ALLOCNO_NREFS (a) = 0; + ALLOCNO_FREQ (a) = 1; + ALLOCNO_HARD_REGNO (a) = -1; + ALLOCNO_CALL_FREQ (a) = 0; + ALLOCNO_CALLS_CROSSED_NUM (a) = 0; +#ifdef STACK_REGS + ALLOCNO_NO_STACK_REG_P (a) = false; + ALLOCNO_TOTAL_NO_STACK_REG_P (a) = false; +#endif + ALLOCNO_MEM_OPTIMIZED_DEST (a) = NULL; + ALLOCNO_MEM_OPTIMIZED_DEST_P (a) = false; + ALLOCNO_SOMEWHERE_RENAMED_P (a) = false; + ALLOCNO_CHILD_RENAMED_P (a) = false; + ALLOCNO_DONT_REASSIGN_P (a) = false; + ALLOCNO_IN_GRAPH_P (a) = false; + ALLOCNO_ASSIGNED_P (a) = false; + ALLOCNO_MAY_BE_SPILLED_P (a) = false; + ALLOCNO_SPLAY_REMOVED_P (a) = false; + ALLOCNO_CONFLICT_VEC_P (a) = false; + ALLOCNO_MODE (a) = (regno < 0 ? VOIDmode : PSEUDO_REGNO_MODE (regno)); + ALLOCNO_COPIES (a) = NULL; + ALLOCNO_HARD_REG_COSTS (a) = NULL; + ALLOCNO_CONFLICT_HARD_REG_COSTS (a) = NULL; + ALLOCNO_UPDATED_HARD_REG_COSTS (a) = NULL; + ALLOCNO_UPDATED_CONFLICT_HARD_REG_COSTS (a) = NULL; + ALLOCNO_LEFT_CONFLICTS_NUM (a) = -1; + ALLOCNO_COVER_CLASS (a) = NO_REGS; + ALLOCNO_COVER_CLASS_COST (a) = 0; + ALLOCNO_MEMORY_COST (a) = 0; + ALLOCNO_UPDATED_MEMORY_COST (a) = 0; + ALLOCNO_EXCESS_PRESSURE_POINTS_NUM (a) = 0; + ALLOCNO_NEXT_BUCKET_ALLOCNO (a) = NULL; + ALLOCNO_PREV_BUCKET_ALLOCNO (a) = NULL; + ALLOCNO_FIRST_COALESCED_ALLOCNO (a) = a; + ALLOCNO_NEXT_COALESCED_ALLOCNO (a) = a; + ALLOCNO_LIVE_RANGES (a) = NULL; + ALLOCNO_MIN (a) = INT_MAX; + ALLOCNO_MAX (a) = -1; + ALLOCNO_CONFLICT_ID (a) = ira_allocnos_num; + VEC_safe_push (ira_allocno_t, heap, allocno_vec, a); + ira_allocnos = VEC_address (ira_allocno_t, allocno_vec); + ira_allocnos_num = VEC_length (ira_allocno_t, allocno_vec); + VEC_safe_push (ira_allocno_t, heap, ira_conflict_id_allocno_map_vec, a); + ira_conflict_id_allocno_map + = VEC_address (ira_allocno_t, ira_conflict_id_allocno_map_vec); + return a; +} + +/* Set up cover class for A and update its conflict hard registers. */ +void +ira_set_allocno_cover_class (ira_allocno_t a, enum reg_class cover_class) +{ + ALLOCNO_COVER_CLASS (a) = cover_class; + IOR_COMPL_HARD_REG_SET (ALLOCNO_CONFLICT_HARD_REGS (a), + reg_class_contents[cover_class]); + IOR_COMPL_HARD_REG_SET (ALLOCNO_TOTAL_CONFLICT_HARD_REGS (a), + reg_class_contents[cover_class]); +} + +/* Return TRUE if the conflict vector with NUM elements is more + profitable than conflict bit vector for A. */ +bool +ira_conflict_vector_profitable_p (ira_allocno_t a, int num) +{ + int nw; + + if (ALLOCNO_MAX (a) < ALLOCNO_MIN (a)) + /* We prefer bit vector in such case because it does not result in + allocation. */ + return false; + + nw = (ALLOCNO_MAX (a) - ALLOCNO_MIN (a) + IRA_INT_BITS) / IRA_INT_BITS; + return (2 * sizeof (ira_allocno_t) * (num + 1) + < 3 * nw * sizeof (IRA_INT_TYPE)); +} + +/* Allocates and initialize the conflict vector of A for NUM + conflicting allocnos. */ +void +ira_allocate_allocno_conflict_vec (ira_allocno_t a, int num) +{ + int size; + ira_allocno_t *vec; + + ira_assert (ALLOCNO_CONFLICT_ALLOCNO_ARRAY (a) == NULL); + num++; /* for NULL end marker */ + size = sizeof (ira_allocno_t) * num; + ALLOCNO_CONFLICT_ALLOCNO_ARRAY (a) = ira_allocate (size); + vec = (ira_allocno_t *) ALLOCNO_CONFLICT_ALLOCNO_ARRAY (a); + vec[0] = NULL; + ALLOCNO_CONFLICT_ALLOCNOS_NUM (a) = 0; + ALLOCNO_CONFLICT_ALLOCNO_ARRAY_SIZE (a) = size; + ALLOCNO_CONFLICT_VEC_P (a) = true; +} + +/* Allocate and initialize the conflict bit vector of A. */ +static void +allocate_allocno_conflict_bit_vec (ira_allocno_t a) +{ + unsigned int size; + + ira_assert (ALLOCNO_CONFLICT_ALLOCNO_ARRAY (a) == NULL); + size = ((ALLOCNO_MAX (a) - ALLOCNO_MIN (a) + IRA_INT_BITS) + / IRA_INT_BITS * sizeof (IRA_INT_TYPE)); + ALLOCNO_CONFLICT_ALLOCNO_ARRAY (a) = ira_allocate (size); + memset (ALLOCNO_CONFLICT_ALLOCNO_ARRAY (a), 0, size); + ALLOCNO_CONFLICT_ALLOCNO_ARRAY_SIZE (a) = size; + ALLOCNO_CONFLICT_VEC_P (a) = false; +} + +/* Allocate and initialize the conflict vector or conflict bit vector + of A for NUM conflicting allocnos whatever is more profitable. */ +void +ira_allocate_allocno_conflicts (ira_allocno_t a, int num) +{ + if (ira_conflict_vector_profitable_p (a, num)) + ira_allocate_allocno_conflict_vec (a, num); + else + allocate_allocno_conflict_bit_vec (a); +} + +/* Add A2 to the conflicts of A1. */ +static void +add_to_allocno_conflicts (ira_allocno_t a1, ira_allocno_t a2) +{ + int num; + unsigned int size; + + if (ALLOCNO_CONFLICT_VEC_P (a1)) + { + ira_allocno_t *vec; + + num = ALLOCNO_CONFLICT_ALLOCNOS_NUM (a1) + 2; + if (ALLOCNO_CONFLICT_ALLOCNO_ARRAY_SIZE (a1) + >= num * sizeof (ira_allocno_t)) + vec = (ira_allocno_t *) ALLOCNO_CONFLICT_ALLOCNO_ARRAY (a1); + else + { + size = (3 * num / 2 + 1) * sizeof (ira_allocno_t); + vec = (ira_allocno_t *) ira_allocate (size); + memcpy (vec, ALLOCNO_CONFLICT_ALLOCNO_ARRAY (a1), + sizeof (ira_allocno_t) * ALLOCNO_CONFLICT_ALLOCNOS_NUM (a1)); + ira_free (ALLOCNO_CONFLICT_ALLOCNO_ARRAY (a1)); + ALLOCNO_CONFLICT_ALLOCNO_ARRAY (a1) = vec; + ALLOCNO_CONFLICT_ALLOCNO_ARRAY_SIZE (a1) = size; + } + vec[num - 2] = a2; + vec[num - 1] = NULL; + ALLOCNO_CONFLICT_ALLOCNOS_NUM (a1)++; + } + else + { + int nw, added_head_nw, id; + IRA_INT_TYPE *vec; + + id = ALLOCNO_CONFLICT_ID (a2); + vec = (IRA_INT_TYPE *) ALLOCNO_CONFLICT_ALLOCNO_ARRAY (a1); + if (ALLOCNO_MIN (a1) > id) + { + /* Expand head of the bit vector. */ + added_head_nw = (ALLOCNO_MIN (a1) - id - 1) / IRA_INT_BITS + 1; + nw = (ALLOCNO_MAX (a1) - ALLOCNO_MIN (a1)) / IRA_INT_BITS + 1; + size = (nw + added_head_nw) * sizeof (IRA_INT_TYPE); + if (ALLOCNO_CONFLICT_ALLOCNO_ARRAY_SIZE (a1) >= size) + { + memmove ((char *) vec + added_head_nw * sizeof (IRA_INT_TYPE), + vec, nw * sizeof (IRA_INT_TYPE)); + memset (vec, 0, added_head_nw * sizeof (IRA_INT_TYPE)); + } + else + { + size + = (3 * (nw + added_head_nw) / 2 + 1) * sizeof (IRA_INT_TYPE); + vec = (IRA_INT_TYPE *) ira_allocate (size); + memcpy ((char *) vec + added_head_nw * sizeof (IRA_INT_TYPE), + ALLOCNO_CONFLICT_ALLOCNO_ARRAY (a1), + nw * sizeof (IRA_INT_TYPE)); + memset (vec, 0, added_head_nw * sizeof (IRA_INT_TYPE)); + memset ((char *) vec + + (nw + added_head_nw) * sizeof (IRA_INT_TYPE), + 0, size - (nw + added_head_nw) * sizeof (IRA_INT_TYPE)); + ira_free (ALLOCNO_CONFLICT_ALLOCNO_ARRAY (a1)); + ALLOCNO_CONFLICT_ALLOCNO_ARRAY (a1) = vec; + ALLOCNO_CONFLICT_ALLOCNO_ARRAY_SIZE (a1) = size; + } + ALLOCNO_MIN (a1) -= added_head_nw * IRA_INT_BITS; + } + else if (ALLOCNO_MAX (a1) < id) + { + nw = (id - ALLOCNO_MIN (a1)) / IRA_INT_BITS + 1; + size = nw * sizeof (IRA_INT_TYPE); + if (ALLOCNO_CONFLICT_ALLOCNO_ARRAY_SIZE (a1) < size) + { + /* Expand tail of the bit vector. */ + size = (3 * nw / 2 + 1) * sizeof (IRA_INT_TYPE); + vec = (IRA_INT_TYPE *) ira_allocate (size); + memcpy (vec, ALLOCNO_CONFLICT_ALLOCNO_ARRAY (a1), + ALLOCNO_CONFLICT_ALLOCNO_ARRAY_SIZE (a1)); + memset ((char *) vec + ALLOCNO_CONFLICT_ALLOCNO_ARRAY_SIZE (a1), + 0, size - ALLOCNO_CONFLICT_ALLOCNO_ARRAY_SIZE (a1)); + ira_free (ALLOCNO_CONFLICT_ALLOCNO_ARRAY (a1)); + ALLOCNO_CONFLICT_ALLOCNO_ARRAY (a1) = vec; + ALLOCNO_CONFLICT_ALLOCNO_ARRAY_SIZE (a1) = size; + } + ALLOCNO_MAX (a1) = id; + } + SET_ALLOCNO_SET_BIT (vec, id, ALLOCNO_MIN (a1), ALLOCNO_MAX (a1)); + } +} + +/* Add A1 to the conflicts of A2 and vise versa. */ +void +ira_add_allocno_conflict (ira_allocno_t a1, ira_allocno_t a2) +{ + add_to_allocno_conflicts (a1, a2); + add_to_allocno_conflicts (a2, a1); +} + +/* Clear all conflicts of allocno A. */ +static void +clear_allocno_conflicts (ira_allocno_t a) +{ + if (ALLOCNO_CONFLICT_VEC_P (a)) + { + ALLOCNO_CONFLICT_ALLOCNOS_NUM (a) = 0; + ((ira_allocno_t *) ALLOCNO_CONFLICT_ALLOCNO_ARRAY (a))[0] = NULL; + } + else if (ALLOCNO_CONFLICT_ALLOCNO_ARRAY_SIZE (a) != 0) + { + int nw; + + nw = (ALLOCNO_MAX (a) - ALLOCNO_MIN (a)) / IRA_INT_BITS + 1; + memset (ALLOCNO_CONFLICT_ALLOCNO_ARRAY (a), 0, + nw * sizeof (IRA_INT_TYPE)); + } +} + +/* The array used to find duplications in conflict vectors of + allocnos. */ +static int *allocno_conflict_check; + +/* The value used to mark allocation presence in conflict vector of + the current allocno. */ +static int curr_allocno_conflict_check_tick; + +/* Remove duplications in conflict vector of A. */ +static void +compress_allocno_conflict_vec (ira_allocno_t a) +{ + ira_allocno_t *vec, conflict_a; + int i, j; + + ira_assert (ALLOCNO_CONFLICT_VEC_P (a)); + vec = (ira_allocno_t *) ALLOCNO_CONFLICT_ALLOCNO_ARRAY (a); + curr_allocno_conflict_check_tick++; + for (i = j = 0; (conflict_a = vec[i]) != NULL; i++) + { + if (allocno_conflict_check[ALLOCNO_NUM (conflict_a)] + != curr_allocno_conflict_check_tick) + { + allocno_conflict_check[ALLOCNO_NUM (conflict_a)] + = curr_allocno_conflict_check_tick; + vec[j++] = conflict_a; + } + } + ALLOCNO_CONFLICT_ALLOCNOS_NUM (a) = j; + vec[j] = NULL; +} + +/* Remove duplications in conflict vectors of all allocnos. */ +static void +compress_conflict_vecs (void) +{ + ira_allocno_t a; + ira_allocno_iterator ai; + + allocno_conflict_check + = (int *) ira_allocate (sizeof (int) * ira_allocnos_num); + memset (allocno_conflict_check, 0, sizeof (int) * ira_allocnos_num); + curr_allocno_conflict_check_tick = 0; + FOR_EACH_ALLOCNO (a, ai) + if (ALLOCNO_CONFLICT_VEC_P (a)) + compress_allocno_conflict_vec (a); + ira_free (allocno_conflict_check); +} + +/* This recursive function outputs allocno A and if it is a cap the + function outputs its members. */ +void +ira_print_expanded_allocno (ira_allocno_t a) +{ + basic_block bb; + + fprintf (ira_dump_file, " a%d(r%d", ALLOCNO_NUM (a), ALLOCNO_REGNO (a)); + if ((bb = ALLOCNO_LOOP_TREE_NODE (a)->bb) != NULL) + fprintf (ira_dump_file, ",b%d", bb->index); + else + fprintf (ira_dump_file, ",l%d", ALLOCNO_LOOP_TREE_NODE (a)->loop->num); + if (ALLOCNO_CAP_MEMBER (a) != NULL) + { + fprintf (ira_dump_file, ":"); + ira_print_expanded_allocno (ALLOCNO_CAP_MEMBER (a)); + } + fprintf (ira_dump_file, ")"); +} + +/* Create and return the cap representing allocno A in the + parent loop. */ +static ira_allocno_t +create_cap_allocno (ira_allocno_t a) +{ + ira_allocno_t cap; + ira_loop_tree_node_t parent; + enum reg_class cover_class; + + ira_assert (ALLOCNO_FIRST_COALESCED_ALLOCNO (a) == a + && ALLOCNO_NEXT_COALESCED_ALLOCNO (a) == a); + parent = ALLOCNO_LOOP_TREE_NODE (a)->parent; + cap = ira_create_allocno (ALLOCNO_REGNO (a), true, parent); + ALLOCNO_MODE (cap) = ALLOCNO_MODE (a); + cover_class = ALLOCNO_COVER_CLASS (a); + ira_set_allocno_cover_class (cap, cover_class); + ALLOCNO_AVAILABLE_REGS_NUM (cap) = ALLOCNO_AVAILABLE_REGS_NUM (a); + ALLOCNO_CAP_MEMBER (cap) = a; + bitmap_set_bit (parent->mentioned_allocnos, ALLOCNO_NUM (cap)); + ALLOCNO_CAP (a) = cap; + ALLOCNO_COVER_CLASS_COST (cap) = ALLOCNO_COVER_CLASS_COST (a); + ALLOCNO_MEMORY_COST (cap) = ALLOCNO_MEMORY_COST (a); + ALLOCNO_UPDATED_MEMORY_COST (cap) = ALLOCNO_UPDATED_MEMORY_COST (a); + ira_allocate_and_copy_costs + (&ALLOCNO_HARD_REG_COSTS (cap), cover_class, ALLOCNO_HARD_REG_COSTS (a)); + ira_allocate_and_copy_costs + (&ALLOCNO_CONFLICT_HARD_REG_COSTS (cap), cover_class, + ALLOCNO_CONFLICT_HARD_REG_COSTS (a)); + ALLOCNO_NREFS (cap) = ALLOCNO_NREFS (a); + ALLOCNO_FREQ (cap) = ALLOCNO_FREQ (a); + ALLOCNO_CALL_FREQ (cap) = ALLOCNO_CALL_FREQ (a); + IOR_HARD_REG_SET (ALLOCNO_CONFLICT_HARD_REGS (cap), + ALLOCNO_CONFLICT_HARD_REGS (a)); + IOR_HARD_REG_SET (ALLOCNO_TOTAL_CONFLICT_HARD_REGS (cap), + ALLOCNO_TOTAL_CONFLICT_HARD_REGS (a)); + ALLOCNO_CALLS_CROSSED_NUM (cap) = ALLOCNO_CALLS_CROSSED_NUM (a); +#ifdef STACK_REGS + ALLOCNO_NO_STACK_REG_P (cap) = ALLOCNO_NO_STACK_REG_P (a); + ALLOCNO_TOTAL_NO_STACK_REG_P (cap) = ALLOCNO_TOTAL_NO_STACK_REG_P (a); +#endif + if (internal_flag_ira_verbose > 2 && ira_dump_file != NULL) + { + fprintf (ira_dump_file, " Creating cap "); + ira_print_expanded_allocno (cap); + fprintf (ira_dump_file, "\n"); + } + return cap; +} + +/* Create and return allocno live range with given attributes. */ +allocno_live_range_t +ira_create_allocno_live_range (ira_allocno_t a, int start, int finish, + allocno_live_range_t next) +{ + allocno_live_range_t p; + + p = (allocno_live_range_t) pool_alloc (allocno_live_range_pool); + p->allocno = a; + p->start = start; + p->finish = finish; + p->next = next; + return p; +} + +/* Copy allocno live range R and return the result. */ +static allocno_live_range_t +copy_allocno_live_range (allocno_live_range_t r) +{ + allocno_live_range_t p; + + p = (allocno_live_range_t) pool_alloc (allocno_live_range_pool); + *p = *r; + return p; +} + +/* Copy allocno live range list given by its head R and return the + result. */ +static allocno_live_range_t +copy_allocno_live_range_list (allocno_live_range_t r) +{ + allocno_live_range_t p, first, last; + + if (r == NULL) + return NULL; + for (first = last = NULL; r != NULL; r = r->next) + { + p = copy_allocno_live_range (r); + if (first == NULL) + first = p; + else + last->next = p; + last = p; + } + return first; +} + +/* Free allocno live range R. */ +void +ira_finish_allocno_live_range (allocno_live_range_t r) +{ + pool_free (allocno_live_range_pool, r); +} + +/* Free updated register costs of allocno A. */ +void +ira_free_allocno_updated_costs (ira_allocno_t a) +{ + enum reg_class cover_class; + + cover_class = ALLOCNO_COVER_CLASS (a); + if (ALLOCNO_UPDATED_HARD_REG_COSTS (a) != NULL) + ira_free_cost_vector (ALLOCNO_UPDATED_HARD_REG_COSTS (a), cover_class); + ALLOCNO_UPDATED_HARD_REG_COSTS (a) = NULL; + if (ALLOCNO_UPDATED_CONFLICT_HARD_REG_COSTS (a) != NULL) + ira_free_cost_vector (ALLOCNO_UPDATED_CONFLICT_HARD_REG_COSTS (a), + cover_class); + ALLOCNO_UPDATED_CONFLICT_HARD_REG_COSTS (a) = NULL; +} + +/* Free the memory allocated for allocno A. */ +static void +finish_allocno (ira_allocno_t a) +{ + allocno_live_range_t r, next_r; + enum reg_class cover_class = ALLOCNO_COVER_CLASS (a); + + ira_allocnos[ALLOCNO_NUM (a)] = NULL; + ira_conflict_id_allocno_map[ALLOCNO_CONFLICT_ID (a)] = NULL; + if (ALLOCNO_CONFLICT_ALLOCNO_ARRAY (a) != NULL) + ira_free (ALLOCNO_CONFLICT_ALLOCNO_ARRAY (a)); + if (ALLOCNO_HARD_REG_COSTS (a) != NULL) + ira_free_cost_vector (ALLOCNO_HARD_REG_COSTS (a), cover_class); + if (ALLOCNO_CONFLICT_HARD_REG_COSTS (a) != NULL) + ira_free_cost_vector (ALLOCNO_CONFLICT_HARD_REG_COSTS (a), cover_class); + if (ALLOCNO_UPDATED_HARD_REG_COSTS (a) != NULL) + ira_free_cost_vector (ALLOCNO_UPDATED_HARD_REG_COSTS (a), cover_class); + if (ALLOCNO_UPDATED_CONFLICT_HARD_REG_COSTS (a) != NULL) + ira_free_cost_vector (ALLOCNO_UPDATED_CONFLICT_HARD_REG_COSTS (a), + cover_class); + for (r = ALLOCNO_LIVE_RANGES (a); r != NULL; r = next_r) + { + next_r = r->next; + ira_finish_allocno_live_range (r); + } + pool_free (allocno_pool, a); +} + +/* Free the memory allocated for all allocnos. */ +static void +finish_allocnos (void) +{ + ira_allocno_t a; + ira_allocno_iterator ai; + + FOR_EACH_ALLOCNO (a, ai) + finish_allocno (a); + ira_free (ira_regno_allocno_map); + VEC_free (ira_allocno_t, heap, ira_conflict_id_allocno_map_vec); + VEC_free (ira_allocno_t, heap, allocno_vec); + free_alloc_pool (allocno_pool); + free_alloc_pool (allocno_live_range_pool); +} + + + +/* Pools for copies. */ +static alloc_pool copy_pool; + +/* Vec containing references to all created copies. It is a + container of array ira_copies. */ +static VEC(ira_copy_t,heap) *copy_vec; + +/* The function initializes data concerning allocno copies. */ +static void +initiate_copies (void) +{ + copy_pool + = create_alloc_pool ("copies", sizeof (struct ira_allocno_copy), 100); + copy_vec = VEC_alloc (ira_copy_t, heap, get_max_uid ()); + ira_copies = NULL; + ira_copies_num = 0; +} + +/* Return copy connecting A1 and A2 and originated from INSN of + LOOP_TREE_NODE if any. */ +static ira_copy_t +find_allocno_copy (ira_allocno_t a1, ira_allocno_t a2, rtx insn, + ira_loop_tree_node_t loop_tree_node) +{ + ira_copy_t cp, next_cp; + ira_allocno_t another_a; + + for (cp = ALLOCNO_COPIES (a1); cp != NULL; cp = next_cp) + { + if (cp->first == a1) + { + next_cp = cp->next_first_allocno_copy; + another_a = cp->second; + } + else if (cp->second == a1) + { + next_cp = cp->next_second_allocno_copy; + another_a = cp->first; + } + else + gcc_unreachable (); + if (another_a == a2 && cp->insn == insn + && cp->loop_tree_node == loop_tree_node) + return cp; + } + return NULL; +} + +/* Create and return copy with given attributes LOOP_TREE_NODE, FIRST, + SECOND, FREQ, and INSN. */ +ira_copy_t +ira_create_copy (ira_allocno_t first, ira_allocno_t second, int freq, rtx insn, + ira_loop_tree_node_t loop_tree_node) +{ + ira_copy_t cp; + + cp = (ira_copy_t) pool_alloc (copy_pool); + cp->num = ira_copies_num; + cp->first = first; + cp->second = second; + cp->freq = freq; + cp->insn = insn; + cp->loop_tree_node = loop_tree_node; + VEC_safe_push (ira_copy_t, heap, copy_vec, cp); + ira_copies = VEC_address (ira_copy_t, copy_vec); + ira_copies_num = VEC_length (ira_copy_t, copy_vec); + return cp; +} + +/* Attach a copy CP to allocnos involved into the copy. */ +void +ira_add_allocno_copy_to_list (ira_copy_t cp) +{ + ira_allocno_t first = cp->first, second = cp->second; + + cp->prev_first_allocno_copy = NULL; + cp->prev_second_allocno_copy = NULL; + cp->next_first_allocno_copy = ALLOCNO_COPIES (first); + if (cp->next_first_allocno_copy != NULL) + { + if (cp->next_first_allocno_copy->first == first) + cp->next_first_allocno_copy->prev_first_allocno_copy = cp; + else + cp->next_first_allocno_copy->prev_second_allocno_copy = cp; + } + cp->next_second_allocno_copy = ALLOCNO_COPIES (second); + if (cp->next_second_allocno_copy != NULL) + { + if (cp->next_second_allocno_copy->second == second) + cp->next_second_allocno_copy->prev_second_allocno_copy = cp; + else + cp->next_second_allocno_copy->prev_first_allocno_copy = cp; + } + ALLOCNO_COPIES (first) = cp; + ALLOCNO_COPIES (second) = cp; +} + +/* Detach a copy CP from allocnos involved into the copy. */ +void +ira_remove_allocno_copy_from_list (ira_copy_t cp) +{ + ira_allocno_t first = cp->first, second = cp->second; + ira_copy_t prev, next; + + next = cp->next_first_allocno_copy; + prev = cp->prev_first_allocno_copy; + if (prev == NULL) + ALLOCNO_COPIES (first) = next; + else if (prev->first == first) + prev->next_first_allocno_copy = next; + else + prev->next_second_allocno_copy = next; + if (next != NULL) + { + if (next->first == first) + next->prev_first_allocno_copy = prev; + else + next->prev_second_allocno_copy = prev; + } + cp->prev_first_allocno_copy = cp->next_first_allocno_copy = NULL; + + next = cp->next_second_allocno_copy; + prev = cp->prev_second_allocno_copy; + if (prev == NULL) + ALLOCNO_COPIES (second) = next; + else if (prev->second == second) + prev->next_second_allocno_copy = next; + else + prev->next_first_allocno_copy = next; + if (next != NULL) + { + if (next->second == second) + next->prev_second_allocno_copy = prev; + else + next->prev_first_allocno_copy = prev; + } + cp->prev_second_allocno_copy = cp->next_second_allocno_copy = NULL; +} + +/* Make a copy CP a canonical copy where number of the + first allocno is less than the second one. */ +void +ira_swap_allocno_copy_ends_if_necessary (ira_copy_t cp) +{ + ira_allocno_t temp; + ira_copy_t temp_cp; + + if (ALLOCNO_NUM (cp->first) <= ALLOCNO_NUM (cp->second)) + return; + + temp = cp->first; + cp->first = cp->second; + cp->second = temp; + + temp_cp = cp->prev_first_allocno_copy; + cp->prev_first_allocno_copy = cp->prev_second_allocno_copy; + cp->prev_second_allocno_copy = temp_cp; + + temp_cp = cp->next_first_allocno_copy; + cp->next_first_allocno_copy = cp->next_second_allocno_copy; + cp->next_second_allocno_copy = temp_cp; +} + +/* Create (or update frequency if the copy already exists) and return + the copy of allocnos FIRST and SECOND with frequency FREQ + corresponding to move insn INSN (if any) and originated from + LOOP_TREE_NODE. */ +ira_copy_t +ira_add_allocno_copy (ira_allocno_t first, ira_allocno_t second, int freq, + rtx insn, ira_loop_tree_node_t loop_tree_node) +{ + ira_copy_t cp; + + if ((cp = find_allocno_copy (first, second, insn, loop_tree_node)) != NULL) + { + cp->freq += freq; + return cp; + } + cp = ira_create_copy (first, second, freq, insn, loop_tree_node); + ira_assert (first != NULL && second != NULL); + ira_add_allocno_copy_to_list (cp); + ira_swap_allocno_copy_ends_if_necessary (cp); + return cp; +} + +/* Print info about copies involving allocno A into file F. */ +static void +print_allocno_copies (FILE *f, ira_allocno_t a) +{ + ira_allocno_t another_a; + ira_copy_t cp, next_cp; + + fprintf (f, " a%d(r%d):", ALLOCNO_NUM (a), ALLOCNO_REGNO (a)); + for (cp = ALLOCNO_COPIES (a); cp != NULL; cp = next_cp) + { + if (cp->first == a) + { + next_cp = cp->next_first_allocno_copy; + another_a = cp->second; + } + else if (cp->second == a) + { + next_cp = cp->next_second_allocno_copy; + another_a = cp->first; + } + else + gcc_unreachable (); + fprintf (f, " cp%d:a%d(r%d)@%d", cp->num, + ALLOCNO_NUM (another_a), ALLOCNO_REGNO (another_a), cp->freq); + } + fprintf (f, "\n"); +} + +/* Print info about copies involving allocno A into stderr. */ +void +ira_debug_allocno_copies (ira_allocno_t a) +{ + print_allocno_copies (stderr, a); +} + +/* The function frees memory allocated for copy CP. */ +static void +finish_copy (ira_copy_t cp) +{ + pool_free (copy_pool, cp); +} + + +/* Free memory allocated for all copies. */ +static void +finish_copies (void) +{ + ira_copy_t cp; + ira_copy_iterator ci; + + FOR_EACH_COPY (cp, ci) + finish_copy (cp); + VEC_free (ira_copy_t, heap, copy_vec); + free_alloc_pool (copy_pool); +} + + + +/* Pools for cost vectors. It is defined only for cover classes. */ +static alloc_pool cost_vector_pool[N_REG_CLASSES]; + +/* The function initiates work with hard register cost vectors. It + creates allocation pool for each cover class. */ +static void +initiate_cost_vectors (void) +{ + int i; + enum reg_class cover_class; + + for (i = 0; i < ira_reg_class_cover_size; i++) + { + cover_class = ira_reg_class_cover[i]; + cost_vector_pool[cover_class] + = create_alloc_pool ("cost vectors", + sizeof (int) + * ira_class_hard_regs_num[cover_class], + 100); + } +} + +/* Allocate and return a cost vector VEC for COVER_CLASS. */ +int * +ira_allocate_cost_vector (enum reg_class cover_class) +{ + return (int *) pool_alloc (cost_vector_pool[cover_class]); +} + +/* Free a cost vector VEC for COVER_CLASS. */ +void +ira_free_cost_vector (int *vec, enum reg_class cover_class) +{ + ira_assert (vec != NULL); + pool_free (cost_vector_pool[cover_class], vec); +} + +/* Finish work with hard register cost vectors. Release allocation + pool for each cover class. */ +static void +finish_cost_vectors (void) +{ + int i; + enum reg_class cover_class; + + for (i = 0; i < ira_reg_class_cover_size; i++) + { + cover_class = ira_reg_class_cover[i]; + free_alloc_pool (cost_vector_pool[cover_class]); + } +} + + + +/* The current loop tree node and its regno allocno map. */ +ira_loop_tree_node_t ira_curr_loop_tree_node; +ira_allocno_t *ira_curr_regno_allocno_map; + +/* This recursive function traverses loop tree with root LOOP_NODE + calling non-null functions PREORDER_FUNC and POSTORDER_FUNC + correspondingly in preorder and postorder. The function sets up + IRA_CURR_LOOP_TREE_NODE and IRA_CURR_REGNO_ALLOCNO_MAP. If BB_P, + basic block nodes of LOOP_NODE is also processed (before its + subloop nodes). */ +void +ira_traverse_loop_tree (bool bb_p, ira_loop_tree_node_t loop_node, + void (*preorder_func) (ira_loop_tree_node_t), + void (*postorder_func) (ira_loop_tree_node_t)) +{ + ira_loop_tree_node_t subloop_node; + + ira_assert (loop_node->bb == NULL); + ira_curr_loop_tree_node = loop_node; + ira_curr_regno_allocno_map = ira_curr_loop_tree_node->regno_allocno_map; + + if (preorder_func != NULL) + (*preorder_func) (loop_node); + + if (bb_p) + for (subloop_node = loop_node->children; + subloop_node != NULL; + subloop_node = subloop_node->next) + if (subloop_node->bb != NULL) + { + if (preorder_func != NULL) + (*preorder_func) (subloop_node); + + if (postorder_func != NULL) + (*postorder_func) (subloop_node); + } + + for (subloop_node = loop_node->subloops; + subloop_node != NULL; + subloop_node = subloop_node->subloop_next) + { + ira_assert (subloop_node->bb == NULL); + ira_traverse_loop_tree (bb_p, subloop_node, + preorder_func, postorder_func); + } + + ira_curr_loop_tree_node = loop_node; + ira_curr_regno_allocno_map = ira_curr_loop_tree_node->regno_allocno_map; + + if (postorder_func != NULL) + (*postorder_func) (loop_node); +} + + + +/* The basic block currently being processed. */ +static basic_block curr_bb; + +/* This recursive function creates allocnos corresponding to + pseudo-registers containing in X. True OUTPUT_P means that X is + a lvalue. */ +static void +create_insn_allocnos (rtx x, bool output_p) +{ + int i, j; + const char *fmt; + enum rtx_code code = GET_CODE (x); + + if (code == REG) + { + int regno; + + if ((regno = REGNO (x)) >= FIRST_PSEUDO_REGISTER) + { + ira_allocno_t a; + + if ((a = ira_curr_regno_allocno_map[regno]) == NULL) + a = ira_create_allocno (regno, false, ira_curr_loop_tree_node); + + ALLOCNO_NREFS (a)++; + ALLOCNO_FREQ (a) += REG_FREQ_FROM_BB (curr_bb); + bitmap_set_bit (ira_curr_loop_tree_node->mentioned_allocnos, + ALLOCNO_NUM (a)); + if (output_p) + bitmap_set_bit (ira_curr_loop_tree_node->modified_regnos, regno); + } + return; + } + else if (code == SET) + { + create_insn_allocnos (SET_DEST (x), true); + create_insn_allocnos (SET_SRC (x), false); + return; + } + else if (code == CLOBBER) + { + create_insn_allocnos (XEXP (x, 0), true); + return; + } + else if (code == MEM) + { + create_insn_allocnos (XEXP (x, 0), false); + return; + } + else if (code == PRE_DEC || code == POST_DEC || code == PRE_INC || + code == POST_INC || code == POST_MODIFY || code == PRE_MODIFY) + { + create_insn_allocnos (XEXP (x, 0), true); + create_insn_allocnos (XEXP (x, 0), false); + return; + } + + fmt = GET_RTX_FORMAT (code); + for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--) + { + if (fmt[i] == 'e') + create_insn_allocnos (XEXP (x, i), output_p); + else if (fmt[i] == 'E') + for (j = 0; j < XVECLEN (x, i); j++) + create_insn_allocnos (XVECEXP (x, i, j), output_p); + } +} + +/* Create allocnos corresponding to pseudo-registers living in the + basic block represented by the corresponding loop tree node + BB_NODE. */ +static void +create_bb_allocnos (ira_loop_tree_node_t bb_node) +{ + basic_block bb; + rtx insn; + unsigned int i; + bitmap_iterator bi; + + curr_bb = bb = bb_node->bb; + ira_assert (bb != NULL); + FOR_BB_INSNS (bb, insn) + if (INSN_P (insn)) + create_insn_allocnos (PATTERN (insn), false); + /* It might be a allocno living through from one subloop to + another. */ + EXECUTE_IF_SET_IN_REG_SET (DF_LR_IN (bb), FIRST_PSEUDO_REGISTER, i, bi) + if (ira_curr_regno_allocno_map[i] == NULL) + ira_create_allocno (i, false, ira_curr_loop_tree_node); +} + +/* Create allocnos corresponding to pseudo-registers living on edge E + (a loop entry or exit). Also mark the allocnos as living on the + loop border. */ +static void +create_loop_allocnos (edge e) +{ + unsigned int i; + bitmap live_in_regs, border_allocnos; + bitmap_iterator bi; + ira_loop_tree_node_t parent; + + live_in_regs = DF_LR_IN (e->dest); + border_allocnos = ira_curr_loop_tree_node->border_allocnos; + EXECUTE_IF_SET_IN_REG_SET (DF_LR_OUT (e->src), + FIRST_PSEUDO_REGISTER, i, bi) + if (bitmap_bit_p (live_in_regs, i)) + { + if (ira_curr_regno_allocno_map[i] == NULL) + { + /* The order of creations is important for right + ira_regno_allocno_map. */ + if ((parent = ira_curr_loop_tree_node->parent) != NULL + && parent->regno_allocno_map[i] == NULL) + ira_create_allocno (i, false, parent); + ira_create_allocno (i, false, ira_curr_loop_tree_node); + } + bitmap_set_bit (border_allocnos, + ALLOCNO_NUM (ira_curr_regno_allocno_map[i])); + } +} + +/* Create allocnos corresponding to pseudo-registers living in loop + represented by the corresponding loop tree node LOOP_NODE. This + function is called by ira_traverse_loop_tree. */ +static void +create_loop_tree_node_allocnos (ira_loop_tree_node_t loop_node) +{ + if (loop_node->bb != NULL) + create_bb_allocnos (loop_node); + else if (loop_node != ira_loop_tree_root) + { + int i; + edge_iterator ei; + edge e; + VEC (edge, heap) *edges; + + FOR_EACH_EDGE (e, ei, loop_node->loop->header->preds) + if (e->src != loop_node->loop->latch) + create_loop_allocnos (e); + + edges = get_loop_exit_edges (loop_node->loop); + for (i = 0; VEC_iterate (edge, edges, i, e); i++) + create_loop_allocnos (e); + VEC_free (edge, heap, edges); + } +} + +/* Propagate information about allocnos modified inside the loop given + by its LOOP_TREE_NODE to its parent. */ +static void +propagate_modified_regnos (ira_loop_tree_node_t loop_tree_node) +{ + if (loop_tree_node == ira_loop_tree_root) + return; + ira_assert (loop_tree_node->bb == NULL); + bitmap_ior_into (loop_tree_node->parent->modified_regnos, + loop_tree_node->modified_regnos); +} + +/* Propagate new info about allocno A (see comments about accumulated + info in allocno definition) to the corresponding allocno on upper + loop tree level. So allocnos on upper levels accumulate + information about the corresponding allocnos in nested regions. + The new info means allocno info finally calculated in this + file. */ +static void +propagate_allocno_info (void) +{ + int i; + ira_allocno_t a, parent_a; + ira_loop_tree_node_t parent; + enum reg_class cover_class; + + if (flag_ira_algorithm != IRA_ALGORITHM_REGIONAL + && flag_ira_algorithm != IRA_ALGORITHM_MIXED) + return; + for (i = max_reg_num () - 1; i >= FIRST_PSEUDO_REGISTER; i--) + for (a = ira_regno_allocno_map[i]; + a != NULL; + a = ALLOCNO_NEXT_REGNO_ALLOCNO (a)) + if ((parent = ALLOCNO_LOOP_TREE_NODE (a)->parent) != NULL + && (parent_a = parent->regno_allocno_map[i]) != NULL + /* There are no caps yet at this point. So use + border_allocnos to find allocnos for the propagation. */ + && bitmap_bit_p (ALLOCNO_LOOP_TREE_NODE (a)->border_allocnos, + ALLOCNO_NUM (a))) + { + ALLOCNO_NREFS (parent_a) += ALLOCNO_NREFS (a); + ALLOCNO_FREQ (parent_a) += ALLOCNO_FREQ (a); + ALLOCNO_CALL_FREQ (parent_a) += ALLOCNO_CALL_FREQ (a); +#ifdef STACK_REGS + if (ALLOCNO_TOTAL_NO_STACK_REG_P (a)) + ALLOCNO_TOTAL_NO_STACK_REG_P (parent_a) = true; +#endif + IOR_HARD_REG_SET (ALLOCNO_TOTAL_CONFLICT_HARD_REGS (parent_a), + ALLOCNO_TOTAL_CONFLICT_HARD_REGS (a)); + ALLOCNO_CALLS_CROSSED_NUM (parent_a) + += ALLOCNO_CALLS_CROSSED_NUM (a); + ALLOCNO_EXCESS_PRESSURE_POINTS_NUM (parent_a) + += ALLOCNO_EXCESS_PRESSURE_POINTS_NUM (a); + cover_class = ALLOCNO_COVER_CLASS (a); + ira_assert (cover_class == ALLOCNO_COVER_CLASS (parent_a)); + ira_allocate_and_accumulate_costs + (&ALLOCNO_HARD_REG_COSTS (parent_a), cover_class, + ALLOCNO_HARD_REG_COSTS (a)); + ira_allocate_and_accumulate_costs + (&ALLOCNO_CONFLICT_HARD_REG_COSTS (parent_a), + cover_class, + ALLOCNO_CONFLICT_HARD_REG_COSTS (a)); + ALLOCNO_COVER_CLASS_COST (parent_a) + += ALLOCNO_COVER_CLASS_COST (a); + ALLOCNO_MEMORY_COST (parent_a) += ALLOCNO_MEMORY_COST (a); + ALLOCNO_UPDATED_MEMORY_COST (parent_a) + += ALLOCNO_UPDATED_MEMORY_COST (a); + } +} + +/* Create allocnos corresponding to pseudo-registers in the current + function. Traverse the loop tree for this. */ +static void +create_allocnos (void) +{ + /* We need to process BB first to correctly link allocnos by member + next_regno_allocno. */ + ira_traverse_loop_tree (true, ira_loop_tree_root, + create_loop_tree_node_allocnos, NULL); + if (optimize) + ira_traverse_loop_tree (false, ira_loop_tree_root, NULL, + propagate_modified_regnos); +} + + + +/* The page contains function to remove some regions from a separate + register allocation. We remove regions whose separate allocation + will hardly improve the result. As a result we speed up regional + register allocation. */ + +/* Merge ranges R1 and R2 and returns the result. The function + maintains the order of ranges and tries to minimize number of the + result ranges. */ +static allocno_live_range_t +merge_ranges (allocno_live_range_t r1, allocno_live_range_t r2) +{ + allocno_live_range_t first, last, temp; + + if (r1 == NULL) + return r2; + if (r2 == NULL) + return r1; + for (first = last = NULL; r1 != NULL && r2 != NULL;) + { + if (r1->start < r2->start) + { + temp = r1; + r1 = r2; + r2 = temp; + } + if (r1->start <= r2->finish + 1) + { + /* Intersected ranges: merge r1 and r2 into r1. */ + r1->start = r2->start; + if (r1->finish < r2->finish) + r1->finish = r2->finish; + temp = r2; + r2 = r2->next; + ira_finish_allocno_live_range (temp); + if (r2 == NULL) + { + /* To try to merge with subsequent ranges in r1. */ + r2 = r1->next; + r1->next = NULL; + } + } + else + { + /* Add r1 to the result. */ + if (first == NULL) + first = last = r1; + else + { + last->next = r1; + last = r1; + } + r1 = r1->next; + if (r1 == NULL) + { + /* To try to merge with subsequent ranges in r2. */ + r1 = r2->next; + r2->next = NULL; + } + } + } + if (r1 != NULL) + { + if (first == NULL) + first = r1; + else + last->next = r1; + ira_assert (r1->next == NULL); + } + else if (r2 != NULL) + { + if (first == NULL) + first = r2; + else + last->next = r2; + ira_assert (r2->next == NULL); + } + else + { + ira_assert (last->next == NULL); + } + return first; +} + +/* The function changes allocno in range list given by R onto A. */ +static void +change_allocno_in_range_list (allocno_live_range_t r, ira_allocno_t a) +{ + for (; r != NULL; r = r->next) + r->allocno = a; +} + +/* Return TRUE if NODE represents a loop with low register + pressure. */ +static bool +low_pressure_loop_node_p (ira_loop_tree_node_t node) +{ + int i; + enum reg_class cover_class; + + if (node->bb != NULL) + return false; + + for (i = 0; i < ira_reg_class_cover_size; i++) + { + cover_class = ira_reg_class_cover[i]; + if (node->reg_pressure[cover_class] + > ira_available_class_regs[cover_class]) + return false; + } + return true; +} + +/* Return TRUE if NODE represents a loop with should be removed from + regional allocation. We remove a loop with low register pressure + inside another loop with register pressure. In this case a + separate allocation of the loop hardly helps (for irregular + register file architecture it could help by choosing a better hard + register in the loop but we prefer faster allocation even in this + case). */ +static bool +loop_node_to_be_removed_p (ira_loop_tree_node_t node) +{ + return (node->parent != NULL && low_pressure_loop_node_p (node->parent) + && low_pressure_loop_node_p (node)); +} + +/* Definition of vector of loop tree nodes. */ +DEF_VEC_P(ira_loop_tree_node_t); +DEF_VEC_ALLOC_P(ira_loop_tree_node_t, heap); + +/* Vec containing references to all removed loop tree nodes. */ +static VEC(ira_loop_tree_node_t,heap) *removed_loop_vec; + +/* Vec containing references to all children of loop tree nodes. */ +static VEC(ira_loop_tree_node_t,heap) *children_vec; + +/* Remove subregions of NODE if their separate allocation will not + improve the result. */ +static void +remove_uneccesary_loop_nodes_from_loop_tree (ira_loop_tree_node_t node) +{ + unsigned int start; + bool remove_p; + ira_loop_tree_node_t subnode; + + remove_p = loop_node_to_be_removed_p (node); + if (! remove_p) + VEC_safe_push (ira_loop_tree_node_t, heap, children_vec, node); + start = VEC_length (ira_loop_tree_node_t, children_vec); + for (subnode = node->children; subnode != NULL; subnode = subnode->next) + if (subnode->bb == NULL) + remove_uneccesary_loop_nodes_from_loop_tree (subnode); + else + VEC_safe_push (ira_loop_tree_node_t, heap, children_vec, subnode); + node->children = node->subloops = NULL; + if (remove_p) + { + VEC_safe_push (ira_loop_tree_node_t, heap, removed_loop_vec, node); + return; + } + while (VEC_length (ira_loop_tree_node_t, children_vec) > start) + { + subnode = VEC_pop (ira_loop_tree_node_t, children_vec); + subnode->parent = node; + subnode->next = node->children; + node->children = subnode; + if (subnode->bb == NULL) + { + subnode->subloop_next = node->subloops; + node->subloops = subnode; + } + } +} + +/* Remove allocnos from loops removed from the allocation + consideration. */ +static void +remove_unnecessary_allocnos (void) +{ + int regno; + bool merged_p; + enum reg_class cover_class; + ira_allocno_t a, prev_a, next_a, parent_a; + ira_loop_tree_node_t a_node, parent; + allocno_live_range_t r; + + merged_p = false; + for (regno = max_reg_num () - 1; regno >= FIRST_PSEUDO_REGISTER; regno--) + for (prev_a = NULL, a = ira_regno_allocno_map[regno]; + a != NULL; + a = next_a) + { + next_a = ALLOCNO_NEXT_REGNO_ALLOCNO (a); + a_node = ALLOCNO_LOOP_TREE_NODE (a); + if (! loop_node_to_be_removed_p (a_node)) + prev_a = a; + else + { + for (parent = a_node->parent; + (parent_a = parent->regno_allocno_map[regno]) == NULL + && loop_node_to_be_removed_p (parent); + parent = parent->parent) + ; + if (parent_a == NULL) + { + /* There are no allocnos with the same regno in upper + region -- just move the allocno to the upper + region. */ + prev_a = a; + ALLOCNO_LOOP_TREE_NODE (a) = parent; + parent->regno_allocno_map[regno] = a; + bitmap_set_bit (parent->mentioned_allocnos, ALLOCNO_NUM (a)); + } + else + { + /* Remove the allocno and update info of allocno in + the upper region. */ + if (prev_a == NULL) + ira_regno_allocno_map[regno] = next_a; + else + ALLOCNO_NEXT_REGNO_ALLOCNO (prev_a) = next_a; + r = ALLOCNO_LIVE_RANGES (a); + change_allocno_in_range_list (r, parent_a); + ALLOCNO_LIVE_RANGES (parent_a) + = merge_ranges (r, ALLOCNO_LIVE_RANGES (parent_a)); + merged_p = true; + ALLOCNO_LIVE_RANGES (a) = NULL; + IOR_HARD_REG_SET (ALLOCNO_CONFLICT_HARD_REGS (parent_a), + ALLOCNO_CONFLICT_HARD_REGS (a)); +#ifdef STACK_REGS + if (ALLOCNO_NO_STACK_REG_P (a)) + ALLOCNO_NO_STACK_REG_P (parent_a) = true; +#endif + ALLOCNO_NREFS (parent_a) += ALLOCNO_NREFS (a); + ALLOCNO_FREQ (parent_a) += ALLOCNO_FREQ (a); + ALLOCNO_CALL_FREQ (parent_a) += ALLOCNO_CALL_FREQ (a); + IOR_HARD_REG_SET + (ALLOCNO_TOTAL_CONFLICT_HARD_REGS (parent_a), + ALLOCNO_TOTAL_CONFLICT_HARD_REGS (a)); + ALLOCNO_CALLS_CROSSED_NUM (parent_a) + += ALLOCNO_CALLS_CROSSED_NUM (a); + ALLOCNO_EXCESS_PRESSURE_POINTS_NUM (parent_a) + += ALLOCNO_EXCESS_PRESSURE_POINTS_NUM (a); +#ifdef STACK_REGS + if (ALLOCNO_TOTAL_NO_STACK_REG_P (a)) + ALLOCNO_TOTAL_NO_STACK_REG_P (parent_a) = true; +#endif + cover_class = ALLOCNO_COVER_CLASS (a); + ira_assert (cover_class == ALLOCNO_COVER_CLASS (parent_a)); + ira_allocate_and_accumulate_costs + (&ALLOCNO_HARD_REG_COSTS (parent_a), cover_class, + ALLOCNO_HARD_REG_COSTS (a)); + ira_allocate_and_accumulate_costs + (&ALLOCNO_CONFLICT_HARD_REG_COSTS (parent_a), + cover_class, + ALLOCNO_CONFLICT_HARD_REG_COSTS (a)); + ALLOCNO_COVER_CLASS_COST (parent_a) + += ALLOCNO_COVER_CLASS_COST (a); + ALLOCNO_MEMORY_COST (parent_a) += ALLOCNO_MEMORY_COST (a); + ALLOCNO_UPDATED_MEMORY_COST (parent_a) + += ALLOCNO_UPDATED_MEMORY_COST (a); + finish_allocno (a); + } + } + } + if (merged_p) + ira_rebuild_start_finish_chains (); +} + +/* Remove loops from consideration. We remove loops for which a + separate allocation will not improve the result. We have to do + this after allocno creation and their costs and cover class + evaluation because only after that the register pressure can be + known and is calculated. */ +static void +remove_unnecessary_regions (void) +{ + children_vec + = VEC_alloc (ira_loop_tree_node_t, heap, + last_basic_block + VEC_length (loop_p, ira_loops.larray)); + removed_loop_vec + = VEC_alloc (ira_loop_tree_node_t, heap, + last_basic_block + VEC_length (loop_p, ira_loops.larray)); + remove_uneccesary_loop_nodes_from_loop_tree (ira_loop_tree_root) ; + VEC_free (ira_loop_tree_node_t, heap, children_vec); + remove_unnecessary_allocnos (); + while (VEC_length (ira_loop_tree_node_t, removed_loop_vec) > 0) + finish_loop_tree_node (VEC_pop (ira_loop_tree_node_t, removed_loop_vec)); + VEC_free (ira_loop_tree_node_t, heap, removed_loop_vec); +} + + + +/* Set up minimal and maximal live range points for allocnos. */ +static void +setup_min_max_allocno_live_range_point (void) +{ + int i; + ira_allocno_t a, parent_a, cap; + ira_allocno_iterator ai; + allocno_live_range_t r; + ira_loop_tree_node_t parent; + + FOR_EACH_ALLOCNO (a, ai) + { + r = ALLOCNO_LIVE_RANGES (a); + if (r == NULL) + continue; + ALLOCNO_MAX (a) = r->finish; + for (; r->next != NULL; r = r->next) + ; + ALLOCNO_MIN (a) = r->start; + } + for (i = max_reg_num () - 1; i >= FIRST_PSEUDO_REGISTER; i--) + for (a = ira_regno_allocno_map[i]; + a != NULL; + a = ALLOCNO_NEXT_REGNO_ALLOCNO (a)) + { + if (ALLOCNO_MAX (a) < 0) + continue; + ira_assert (ALLOCNO_CAP_MEMBER (a) == NULL); + /* Accumulation of range info. */ + if (ALLOCNO_CAP (a) != NULL) + { + for (cap = ALLOCNO_CAP (a); cap != NULL; cap = ALLOCNO_CAP (cap)) + { + if (ALLOCNO_MAX (cap) < ALLOCNO_MAX (a)) + ALLOCNO_MAX (cap) = ALLOCNO_MAX (a); + if (ALLOCNO_MIN (cap) > ALLOCNO_MIN (a)) + ALLOCNO_MIN (cap) = ALLOCNO_MIN (a); + } + continue; + } + if ((parent = ALLOCNO_LOOP_TREE_NODE (a)->parent) == NULL) + continue; + parent_a = parent->regno_allocno_map[i]; + if (ALLOCNO_MAX (parent_a) < ALLOCNO_MAX (a)) + ALLOCNO_MAX (parent_a) = ALLOCNO_MAX (a); + if (ALLOCNO_MIN (parent_a) > ALLOCNO_MIN (a)) + ALLOCNO_MIN (parent_a) = ALLOCNO_MIN (a); + } +#ifdef ENABLE_IRA_CHECKING + FOR_EACH_ALLOCNO (a, ai) + { + if ((0 <= ALLOCNO_MIN (a) && ALLOCNO_MIN (a) <= ira_max_point) + && (0 <= ALLOCNO_MAX (a) && ALLOCNO_MAX (a) <= ira_max_point)) + continue; + gcc_unreachable (); + } +#endif +} + +/* Sort allocnos according to their live ranges. Allocnos with + smaller cover class are put first. Allocnos with the same cove + class are ordered according their start (min). Allocnos with the + same start are ordered according their finish (max). */ +static int +allocno_range_compare_func (const void *v1p, const void *v2p) +{ + int diff; + ira_allocno_t a1 = *(const ira_allocno_t *) v1p; + ira_allocno_t a2 = *(const ira_allocno_t *) v2p; + + if ((diff = ALLOCNO_COVER_CLASS (a1) - ALLOCNO_COVER_CLASS (a2)) != 0) + return diff; + if ((diff = ALLOCNO_MIN (a1) - ALLOCNO_MIN (a2)) != 0) + return diff; + if ((diff = ALLOCNO_MAX (a1) - ALLOCNO_MAX (a2)) != 0) + return diff; + return ALLOCNO_NUM (a1) - ALLOCNO_NUM (a2); +} + +/* Sort ira_conflict_id_allocno_map and set up conflict id of + allocnos. */ +static void +sort_conflict_id_allocno_map (void) +{ + int i, num; + ira_allocno_t a; + ira_allocno_iterator ai; + + num = 0; + FOR_EACH_ALLOCNO (a, ai) + ira_conflict_id_allocno_map[num++] = a; + qsort (ira_conflict_id_allocno_map, num, sizeof (ira_allocno_t), + allocno_range_compare_func); + for (i = 0; i < num; i++) + if ((a = ira_conflict_id_allocno_map[i]) != NULL) + ALLOCNO_CONFLICT_ID (a) = i; + for (i = num; i < ira_allocnos_num; i++) + ira_conflict_id_allocno_map[i] = NULL; +} + +/* Set up minimal and maximal conflict ids of allocnos with which + given allocno can conflict. */ +static void +setup_min_max_conflict_allocno_ids (void) +{ + enum reg_class cover_class; + int i, j, min, max, start, finish, first_not_finished, filled_area_start; + int *live_range_min, *last_lived; + ira_allocno_t a; + + live_range_min = (int *) ira_allocate (sizeof (int) * ira_allocnos_num); + cover_class = -1; + first_not_finished = -1; + for (i = 0; i < ira_allocnos_num; i++) + { + a = ira_conflict_id_allocno_map[i]; + if (a == NULL) + continue; + if (cover_class != ALLOCNO_COVER_CLASS (a)) + { + cover_class = ALLOCNO_COVER_CLASS (a); + min = i; + first_not_finished = i; + } + else + { + start = ALLOCNO_MIN (a); + /* If we skip an allocno, the allocno with smaller ids will + be also skipped because of the secondary sorting the + range finishes (see function + allocno_range_compare_func). */ + while (first_not_finished < i + && start > ALLOCNO_MAX (ira_conflict_id_allocno_map + [first_not_finished])) + first_not_finished++; + min = first_not_finished; + } + if (min == i) + /* We could increase min further in this case but it is good + enough. */ + min++; + live_range_min[i] = ALLOCNO_MIN (a); + ALLOCNO_MIN (a) = min; + } + last_lived = (int *) ira_allocate (sizeof (int) * ira_max_point); + cover_class = -1; + filled_area_start = -1; + for (i = ira_allocnos_num - 1; i >= 0; i--) + { + a = ira_conflict_id_allocno_map[i]; + if (a == NULL) + continue; + if (cover_class != ALLOCNO_COVER_CLASS (a)) + { + cover_class = ALLOCNO_COVER_CLASS (a); + for (j = 0; j < ira_max_point; j++) + last_lived[j] = -1; + filled_area_start = ira_max_point; + } + min = live_range_min[i]; + finish = ALLOCNO_MAX (a); + max = last_lived[finish]; + if (max < 0) + /* We could decrease max further in this case but it is good + enough. */ + max = ALLOCNO_CONFLICT_ID (a) - 1; + ALLOCNO_MAX (a) = max; + /* In filling, we can go further A range finish to recognize + intersection quickly because if the finish of subsequently + processed allocno (it has smaller conflict id) range is + further A range finish than they are definitely intersected + (the reason for this is the allocnos with bigger conflict id + have their range starts not smaller than allocnos with + smaller ids. */ + for (j = min; j < filled_area_start; j++) + last_lived[j] = i; + filled_area_start = min; + } + ira_free (last_lived); + ira_free (live_range_min); +} + + + +static void +create_caps (void) +{ + ira_allocno_t a; + ira_allocno_iterator ai; + ira_loop_tree_node_t loop_tree_node; + + FOR_EACH_ALLOCNO (a, ai) + { + if (ALLOCNO_LOOP_TREE_NODE (a) == ira_loop_tree_root) + continue; + if (ALLOCNO_CAP_MEMBER (a) != NULL) + create_cap_allocno (a); + else if (ALLOCNO_CAP (a) == NULL) + { + loop_tree_node = ALLOCNO_LOOP_TREE_NODE (a); + if (!bitmap_bit_p (loop_tree_node->border_allocnos, ALLOCNO_NUM (a))) + create_cap_allocno (a); + } + } +} + + + +/* The page contains code transforming more one region internal + representation (IR) to one region IR which is necessary for reload. + This transformation is called IR flattening. We might just rebuild + the IR for one region but we don't do it because it takes a lot of + time. */ + +/* This recursive function returns immediate common dominator of two + loop tree nodes N1 and N2. */ +static ira_loop_tree_node_t +common_loop_tree_node_dominator (ira_loop_tree_node_t n1, + ira_loop_tree_node_t n2) +{ + ira_assert (n1 != NULL && n2 != NULL); + if (n1 == n2) + return n1; + if (n1->level < n2->level) + return common_loop_tree_node_dominator (n1, n2->parent); + else if (n1->level > n2->level) + return common_loop_tree_node_dominator (n1->parent, n2); + else + return common_loop_tree_node_dominator (n1->parent, n2->parent); +} + +/* Flatten the IR. In other words, this function transforms IR as if + it were built with one region (without loops). We could make it + much simpler by rebuilding IR with one region, but unfortunately it + takes a lot of time. MAX_REGNO_BEFORE_EMIT and + IRA_MAX_POINT_BEFORE_EMIT are correspondingly MAX_REG_NUM () and + IRA_MAX_POINT before emitting insns on the loop borders. */ +void +ira_flattening (int max_regno_before_emit, int ira_max_point_before_emit) +{ + int i, j, num; + bool propagate_p, stop_p, keep_p; + int hard_regs_num; + bool new_pseudos_p, merged_p; + unsigned int n; + enum reg_class cover_class; + ira_allocno_t a, parent_a, first, second, node_first, node_second; + ira_allocno_t dominator_a; + ira_copy_t cp; + ira_loop_tree_node_t parent, node, dominator; + allocno_live_range_t r; + ira_allocno_iterator ai; + ira_copy_iterator ci; + sparseset allocnos_live; + /* Map: regno -> allocnos which will finally represent the regno for + IR with one region. */ + ira_allocno_t *regno_top_level_allocno_map; + bool *allocno_propagated_p; + + regno_top_level_allocno_map + = (ira_allocno_t *) ira_allocate (max_reg_num () * sizeof (ira_allocno_t)); + memset (regno_top_level_allocno_map, 0, + max_reg_num () * sizeof (ira_allocno_t)); + allocno_propagated_p + = (bool *) ira_allocate (ira_allocnos_num * sizeof (bool)); + memset (allocno_propagated_p, 0, ira_allocnos_num * sizeof (bool)); + new_pseudos_p = merged_p = false; + /* Fix final allocno attributes. */ + for (i = max_regno_before_emit - 1; i >= FIRST_PSEUDO_REGISTER; i--) + { + propagate_p = false; + for (a = ira_regno_allocno_map[i]; + a != NULL; + a = ALLOCNO_NEXT_REGNO_ALLOCNO (a)) + { + ira_assert (ALLOCNO_CAP_MEMBER (a) == NULL); + if (ALLOCNO_SOMEWHERE_RENAMED_P (a)) + new_pseudos_p = true; + if (ALLOCNO_CAP (a) != NULL + || (parent = ALLOCNO_LOOP_TREE_NODE (a)->parent) == NULL + || ((parent_a = parent->regno_allocno_map[ALLOCNO_REGNO (a)]) + == NULL)) + { + ALLOCNO_COPIES (a) = NULL; + regno_top_level_allocno_map[REGNO (ALLOCNO_REG (a))] = a; + continue; + } + ira_assert (ALLOCNO_CAP_MEMBER (parent_a) == NULL); + if (propagate_p) + { + if (!allocno_propagated_p [ALLOCNO_NUM (parent_a)]) + COPY_HARD_REG_SET (ALLOCNO_TOTAL_CONFLICT_HARD_REGS (parent_a), + ALLOCNO_CONFLICT_HARD_REGS (parent_a)); + IOR_HARD_REG_SET (ALLOCNO_TOTAL_CONFLICT_HARD_REGS (parent_a), + ALLOCNO_TOTAL_CONFLICT_HARD_REGS (a)); +#ifdef STACK_REGS + if (!allocno_propagated_p [ALLOCNO_NUM (parent_a)]) + ALLOCNO_TOTAL_NO_STACK_REG_P (parent_a) + = ALLOCNO_NO_STACK_REG_P (parent_a); + ALLOCNO_TOTAL_NO_STACK_REG_P (parent_a) + = (ALLOCNO_TOTAL_NO_STACK_REG_P (parent_a) + || ALLOCNO_TOTAL_NO_STACK_REG_P (a)); +#endif + allocno_propagated_p [ALLOCNO_NUM (parent_a)] = true; + } + if (REGNO (ALLOCNO_REG (a)) == REGNO (ALLOCNO_REG (parent_a))) + { + if (internal_flag_ira_verbose > 4 && ira_dump_file != NULL) + { + fprintf (ira_dump_file, + " Moving ranges of a%dr%d to a%dr%d: ", + ALLOCNO_NUM (a), REGNO (ALLOCNO_REG (a)), + ALLOCNO_NUM (parent_a), + REGNO (ALLOCNO_REG (parent_a))); + ira_print_live_range_list (ira_dump_file, + ALLOCNO_LIVE_RANGES (a)); + } + change_allocno_in_range_list (ALLOCNO_LIVE_RANGES (a), parent_a); + ALLOCNO_LIVE_RANGES (parent_a) + = merge_ranges (ALLOCNO_LIVE_RANGES (a), + ALLOCNO_LIVE_RANGES (parent_a)); + merged_p = true; + ALLOCNO_LIVE_RANGES (a) = NULL; + ALLOCNO_MEM_OPTIMIZED_DEST_P (parent_a) + = (ALLOCNO_MEM_OPTIMIZED_DEST_P (parent_a) + || ALLOCNO_MEM_OPTIMIZED_DEST_P (a)); + continue; + } + new_pseudos_p = true; + propagate_p = true; + first = ALLOCNO_MEM_OPTIMIZED_DEST (a) == NULL ? NULL : a; + stop_p = false; + for (;;) + { + if (first == NULL + && ALLOCNO_MEM_OPTIMIZED_DEST (parent_a) != NULL) + first = parent_a; + ALLOCNO_NREFS (parent_a) -= ALLOCNO_NREFS (a); + ALLOCNO_FREQ (parent_a) -= ALLOCNO_FREQ (a); + if (first != NULL + && ALLOCNO_MEM_OPTIMIZED_DEST (first) == parent_a) + stop_p = true; + else if (!stop_p) + { + ALLOCNO_CALL_FREQ (parent_a) -= ALLOCNO_CALL_FREQ (a); + ALLOCNO_CALLS_CROSSED_NUM (parent_a) + -= ALLOCNO_CALLS_CROSSED_NUM (a); + ALLOCNO_EXCESS_PRESSURE_POINTS_NUM (parent_a) + -= ALLOCNO_EXCESS_PRESSURE_POINTS_NUM (a); + } + ira_assert (ALLOCNO_CALLS_CROSSED_NUM (parent_a) >= 0 + && ALLOCNO_NREFS (parent_a) >= 0 + && ALLOCNO_FREQ (parent_a) >= 0); + cover_class = ALLOCNO_COVER_CLASS (parent_a); + hard_regs_num = ira_class_hard_regs_num[cover_class]; + if (ALLOCNO_HARD_REG_COSTS (a) != NULL + && ALLOCNO_HARD_REG_COSTS (parent_a) != NULL) + for (j = 0; j < hard_regs_num; j++) + ALLOCNO_HARD_REG_COSTS (parent_a)[j] + -= ALLOCNO_HARD_REG_COSTS (a)[j]; + if (ALLOCNO_CONFLICT_HARD_REG_COSTS (a) != NULL + && ALLOCNO_CONFLICT_HARD_REG_COSTS (parent_a) != NULL) + for (j = 0; j < hard_regs_num; j++) + ALLOCNO_CONFLICT_HARD_REG_COSTS (parent_a)[j] + -= ALLOCNO_CONFLICT_HARD_REG_COSTS (a)[j]; + ALLOCNO_COVER_CLASS_COST (parent_a) + -= ALLOCNO_COVER_CLASS_COST (a); + ALLOCNO_MEMORY_COST (parent_a) -= ALLOCNO_MEMORY_COST (a); + if (ALLOCNO_CAP (parent_a) != NULL + || (parent + = ALLOCNO_LOOP_TREE_NODE (parent_a)->parent) == NULL + || (parent_a = (parent->regno_allocno_map + [ALLOCNO_REGNO (parent_a)])) == NULL) + break; + } + if (first != NULL) + { + parent_a = ALLOCNO_MEM_OPTIMIZED_DEST (first); + dominator = common_loop_tree_node_dominator + (ALLOCNO_LOOP_TREE_NODE (parent_a), + ALLOCNO_LOOP_TREE_NODE (first)); + dominator_a = dominator->regno_allocno_map[ALLOCNO_REGNO (a)]; + ira_assert (parent_a != NULL); + stop_p = first != a; + /* Remember that exit can be to a grandparent (not only + to a parent) or a child of the grandparent. */ + for (first = a;;) + { + if (internal_flag_ira_verbose > 4 && ira_dump_file != NULL) + { + fprintf + (ira_dump_file, + " Coping ranges of a%dr%d to a%dr%d: ", + ALLOCNO_NUM (first), REGNO (ALLOCNO_REG (first)), + ALLOCNO_NUM (parent_a), + REGNO (ALLOCNO_REG (parent_a))); + ira_print_live_range_list (ira_dump_file, + ALLOCNO_LIVE_RANGES (first)); + } + r = copy_allocno_live_range_list (ALLOCNO_LIVE_RANGES + (first)); + change_allocno_in_range_list (r, parent_a); + ALLOCNO_LIVE_RANGES (parent_a) + = merge_ranges (r, ALLOCNO_LIVE_RANGES (parent_a)); + merged_p = true; + if (stop_p) + break; + parent = ALLOCNO_LOOP_TREE_NODE (first)->parent; + ira_assert (parent != NULL); + first = parent->regno_allocno_map[ALLOCNO_REGNO (a)]; + ira_assert (first != NULL); + if (first == dominator_a) + break; + } + } + ALLOCNO_COPIES (a) = NULL; + regno_top_level_allocno_map[REGNO (ALLOCNO_REG (a))] = a; + } + } + ira_free (allocno_propagated_p); + ira_assert (new_pseudos_p || ira_max_point_before_emit == ira_max_point); + if (merged_p || ira_max_point_before_emit != ira_max_point) + ira_rebuild_start_finish_chains (); + if (new_pseudos_p) + { + /* Rebuild conflicts. */ + FOR_EACH_ALLOCNO (a, ai) + { + if (a != regno_top_level_allocno_map[REGNO (ALLOCNO_REG (a))] + || ALLOCNO_CAP_MEMBER (a) != NULL) + continue; + for (r = ALLOCNO_LIVE_RANGES (a); r != NULL; r = r->next) + ira_assert (r->allocno == a); + clear_allocno_conflicts (a); + } + allocnos_live = sparseset_alloc (ira_allocnos_num); + for (i = 0; i < ira_max_point; i++) + { + for (r = ira_start_point_ranges[i]; r != NULL; r = r->start_next) + { + a = r->allocno; + if (a != regno_top_level_allocno_map[REGNO (ALLOCNO_REG (a))] + || ALLOCNO_CAP_MEMBER (a) != NULL) + continue; + num = ALLOCNO_NUM (a); + cover_class = ALLOCNO_COVER_CLASS (a); + sparseset_set_bit (allocnos_live, num); + EXECUTE_IF_SET_IN_SPARSESET (allocnos_live, n) + { + ira_allocno_t live_a = ira_allocnos[n]; + + if (cover_class == ALLOCNO_COVER_CLASS (live_a) + /* Don't set up conflict for the allocno with itself. */ + && num != (int) n) + ira_add_allocno_conflict (a, live_a); + } + } + + for (r = ira_finish_point_ranges[i]; r != NULL; r = r->finish_next) + sparseset_clear_bit (allocnos_live, ALLOCNO_NUM (r->allocno)); + } + sparseset_free (allocnos_live); + compress_conflict_vecs (); + } + /* Mark some copies for removing and change allocnos in the rest + copies. */ + FOR_EACH_COPY (cp, ci) + { + if (ALLOCNO_CAP_MEMBER (cp->first) != NULL + || ALLOCNO_CAP_MEMBER (cp->second) != NULL) + { + if (internal_flag_ira_verbose > 4 && ira_dump_file != NULL) + fprintf + (ira_dump_file, " Remove cp%d:%c%dr%d-%c%dr%d\n", + cp->num, ALLOCNO_CAP_MEMBER (cp->first) != NULL ? 'c' : 'a', + ALLOCNO_NUM (cp->first), REGNO (ALLOCNO_REG (cp->first)), + ALLOCNO_CAP_MEMBER (cp->second) != NULL ? 'c' : 'a', + ALLOCNO_NUM (cp->second), REGNO (ALLOCNO_REG (cp->second))); + cp->loop_tree_node = NULL; + continue; + } + first = regno_top_level_allocno_map[REGNO (ALLOCNO_REG (cp->first))]; + second = regno_top_level_allocno_map[REGNO (ALLOCNO_REG (cp->second))]; + node = cp->loop_tree_node; + if (node == NULL) + keep_p = true; /* It copy generated in ira-emit.c. */ + else + { + /* Check that the copy was not propagated from level on + which we will have different pseudos. */ + node_first = node->regno_allocno_map[ALLOCNO_REGNO (cp->first)]; + node_second = node->regno_allocno_map[ALLOCNO_REGNO (cp->second)]; + keep_p = ((REGNO (ALLOCNO_REG (first)) + == REGNO (ALLOCNO_REG (node_first))) + && (REGNO (ALLOCNO_REG (second)) + == REGNO (ALLOCNO_REG (node_second)))); + } + if (keep_p) + { + cp->loop_tree_node = ira_loop_tree_root; + cp->first = first; + cp->second = second; + } + else + { + cp->loop_tree_node = NULL; + if (internal_flag_ira_verbose > 4 && ira_dump_file != NULL) + fprintf (ira_dump_file, " Remove cp%d:a%dr%d-a%dr%d\n", + cp->num, ALLOCNO_NUM (cp->first), + REGNO (ALLOCNO_REG (cp->first)), ALLOCNO_NUM (cp->second), + REGNO (ALLOCNO_REG (cp->second))); + } + } + /* Remove unnecessary allocnos on lower levels of the loop tree. */ + FOR_EACH_ALLOCNO (a, ai) + { + if (a != regno_top_level_allocno_map[REGNO (ALLOCNO_REG (a))] + || ALLOCNO_CAP_MEMBER (a) != NULL) + { + if (internal_flag_ira_verbose > 4 && ira_dump_file != NULL) + fprintf (ira_dump_file, " Remove a%dr%d\n", + ALLOCNO_NUM (a), REGNO (ALLOCNO_REG (a))); + finish_allocno (a); + continue; + } + ALLOCNO_LOOP_TREE_NODE (a) = ira_loop_tree_root; + ALLOCNO_REGNO (a) = REGNO (ALLOCNO_REG (a)); + ALLOCNO_CAP (a) = NULL; + ALLOCNO_UPDATED_MEMORY_COST (a) = ALLOCNO_MEMORY_COST (a); + if (! ALLOCNO_ASSIGNED_P (a)) + ira_free_allocno_updated_costs (a); + ira_assert (ALLOCNO_UPDATED_HARD_REG_COSTS (a) == NULL); + ira_assert (ALLOCNO_UPDATED_CONFLICT_HARD_REG_COSTS (a) == NULL); + } + /* Remove unnecessary copies. */ + FOR_EACH_COPY (cp, ci) + { + if (cp->loop_tree_node == NULL) + { + ira_copies[cp->num] = NULL; + finish_copy (cp); + continue; + } + ira_assert + (ALLOCNO_LOOP_TREE_NODE (cp->first) == ira_loop_tree_root + && ALLOCNO_LOOP_TREE_NODE (cp->second) == ira_loop_tree_root); + ira_add_allocno_copy_to_list (cp); + ira_swap_allocno_copy_ends_if_necessary (cp); + } + rebuild_regno_allocno_maps (); + ira_free (regno_top_level_allocno_map); +} + + + +#ifdef ENABLE_IRA_CHECKING +/* Check creation of all allocnos. Allocnos on lower levels should + have allocnos or caps on all upper levels. */ +static void +check_allocno_creation (void) +{ + ira_allocno_t a; + ira_allocno_iterator ai; + ira_loop_tree_node_t loop_tree_node; + + FOR_EACH_ALLOCNO (a, ai) + { + if (ALLOCNO_LOOP_TREE_NODE (a) == ira_loop_tree_root) + continue; + if (ALLOCNO_CAP_MEMBER (a) != NULL) + { + ira_assert (ALLOCNO_CAP (a) != NULL); + } + else if (ALLOCNO_CAP (a) == NULL) + { + loop_tree_node = ALLOCNO_LOOP_TREE_NODE (a); + ira_assert (loop_tree_node->parent + ->regno_allocno_map[ALLOCNO_REGNO (a)] != NULL + && bitmap_bit_p (loop_tree_node->border_allocnos, + ALLOCNO_NUM (a))); + } + } +} +#endif + +/* Create a internal representation (IR) for IRA (allocnos, copies, + loop tree nodes). If LOOPS_P is FALSE the nodes corresponding to + the loops (except the root which corresponds the all function) and + correspondingly allocnos for the loops will be not created. Such + parameter value is used for Chaitin-Briggs coloring. The function + returns TRUE if we generate loop structure (besides nodes + representing all function and the basic blocks) for regional + allocation. A true return means that we really need to flatten IR + before the reload. */ +bool +ira_build (bool loops_p) +{ + df_analyze (); + + initiate_cost_vectors (); + initiate_allocnos (); + initiate_copies (); + create_loop_tree_nodes (loops_p); + form_loop_tree (); + create_allocnos (); + ira_costs (); + ira_create_allocno_live_ranges (); + remove_unnecessary_regions (); + loops_p = more_one_region_p (); + if (loops_p) + { + propagate_allocno_info (); + create_caps (); + } + ira_tune_allocno_costs_and_cover_classes (); +#ifdef ENABLE_IRA_CHECKING + check_allocno_creation (); +#endif + setup_min_max_allocno_live_range_point (); + sort_conflict_id_allocno_map (); + setup_min_max_conflict_allocno_ids (); + ira_build_conflicts (); + if (internal_flag_ira_verbose > 0 && ira_dump_file != NULL) + { + int n, nr; + ira_allocno_t a; + allocno_live_range_t r; + ira_allocno_iterator ai; + + n = 0; + FOR_EACH_ALLOCNO (a, ai) + n += ALLOCNO_CONFLICT_ALLOCNOS_NUM (a); + nr = 0; + FOR_EACH_ALLOCNO (a, ai) + for (r = ALLOCNO_LIVE_RANGES (a); r != NULL; r = r->next) + nr++; + fprintf (ira_dump_file, " regions=%d, blocks=%d, points=%d\n", + VEC_length (loop_p, ira_loops.larray), n_basic_blocks, + ira_max_point); + fprintf (ira_dump_file, + " allocnos=%d, copies=%d, conflicts=%d, ranges=%d\n", + ira_allocnos_num, ira_copies_num, n, nr); + } + return loops_p; +} + +/* Release the data created by function ira_build. */ +void +ira_destroy (void) +{ + finish_loop_tree_nodes (); + finish_copies (); + finish_allocnos (); + finish_cost_vectors (); + ira_finish_allocno_live_ranges (); +} diff --git a/gcc/ira-color.c b/gcc/ira-color.c new file mode 100644 index 00000000000..f3e4673ad6f --- /dev/null +++ b/gcc/ira-color.c @@ -0,0 +1,2955 @@ +/* IRA allocation based on graph coloring. + Copyright (C) 2006, 2007, 2008 + Free Software Foundation, Inc. + Contributed by Vladimir Makarov <vmakarov@redhat.com>. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 3, or (at your option) any later +version. + +GCC is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +<http://www.gnu.org/licenses/>. */ + +#include "config.h" +#include "system.h" +#include "coretypes.h" +#include "tm.h" +#include "rtl.h" +#include "tm_p.h" +#include "target.h" +#include "regs.h" +#include "flags.h" +#include "sbitmap.h" +#include "bitmap.h" +#include "hard-reg-set.h" +#include "basic-block.h" +#include "expr.h" +#include "toplev.h" +#include "reload.h" +#include "params.h" +#include "df.h" +#include "splay-tree.h" +#include "ira-int.h" + +/* This file contains code for regional graph coloring, spill/restore + code placement optimization, and code helping the reload pass to do + a better job. */ + +/* Bitmap of allocnos which should be colored. */ +static bitmap coloring_allocno_bitmap; + +/* Bitmap of allocnos which should be taken into account during + coloring. In general case it contains allocnos from + coloring_allocno_bitmap plus other already colored conflicting + allocnos. */ +static bitmap consideration_allocno_bitmap; + +/* TRUE if we coalesced some allocnos. In other words, if we got + loops formed by members first_coalesced_allocno and + next_coalesced_allocno containing more one allocno. */ +static bool allocno_coalesced_p; + +/* Bitmap used to prevent a repeated allocno processing because of + coalescing. */ +static bitmap processed_coalesced_allocno_bitmap; + +/* All allocnos sorted according their priorities. */ +static ira_allocno_t *sorted_allocnos; + +/* Vec representing the stack of allocnos used during coloring. */ +static VEC(ira_allocno_t,heap) *allocno_stack_vec; + +/* Array used to choose an allocno for spilling. */ +static ira_allocno_t *allocnos_for_spilling; + +/* Pool for splay tree nodes. */ +static alloc_pool splay_tree_node_pool; + +/* When an allocno is removed from the splay tree, it is put in the + following vector for subsequent inserting it into the splay tree + after putting all colorable allocnos onto the stack. The allocno + could be removed from and inserted to the splay tree every time + when its spilling priority is changed but such solution would be + more costly although simpler. */ +static VEC(ira_allocno_t,heap) *removed_splay_allocno_vec; + + + +/* This page contains functions used to choose hard registers for + allocnos. */ + +/* Array whose element value is TRUE if the corresponding hard + register was already allocated for an allocno. */ +static bool allocated_hardreg_p[FIRST_PSEUDO_REGISTER]; + +/* Array used to check already processed allocnos during the current + update_copy_costs call. */ +static int *allocno_update_cost_check; + +/* The current value of update_copy_cost call count. */ +static int update_cost_check; + +/* Allocate and initialize data necessary for function + update_copy_costs. */ +static void +initiate_cost_update (void) +{ + allocno_update_cost_check + = (int *) ira_allocate (ira_allocnos_num * sizeof (int)); + memset (allocno_update_cost_check, 0, ira_allocnos_num * sizeof (int)); + update_cost_check = 0; +} + +/* Deallocate data used by function update_copy_costs. */ +static void +finish_cost_update (void) +{ + ira_free (allocno_update_cost_check); +} + +/* This recursive function updates costs (decrease if DECR_P) of the + unassigned allocnos connected by copies with ALLOCNO. This update + increases chances to remove some copies. Copy cost is proportional + the copy frequency divided by DIVISOR. */ +static void +update_copy_costs_1 (ira_allocno_t allocno, int hard_regno, + bool decr_p, int divisor) +{ + int i, cost, update_cost; + enum machine_mode mode; + enum reg_class rclass, cover_class; + ira_allocno_t another_allocno; + ira_copy_t cp, next_cp; + + cover_class = ALLOCNO_COVER_CLASS (allocno); + if (cover_class == NO_REGS) + return; + if (allocno_update_cost_check[ALLOCNO_NUM (allocno)] == update_cost_check) + return; + allocno_update_cost_check[ALLOCNO_NUM (allocno)] = update_cost_check; + ira_assert (hard_regno >= 0); + i = ira_class_hard_reg_index[cover_class][hard_regno]; + ira_assert (i >= 0); + rclass = REGNO_REG_CLASS (hard_regno); + mode = ALLOCNO_MODE (allocno); + for (cp = ALLOCNO_COPIES (allocno); cp != NULL; cp = next_cp) + { + if (cp->first == allocno) + { + next_cp = cp->next_first_allocno_copy; + another_allocno = cp->second; + } + else if (cp->second == allocno) + { + next_cp = cp->next_second_allocno_copy; + another_allocno = cp->first; + } + else + gcc_unreachable (); + if (cover_class + != ALLOCNO_COVER_CLASS (another_allocno) + || ALLOCNO_ASSIGNED_P (another_allocno)) + continue; + cost = (cp->second == allocno + ? ira_register_move_cost[mode][rclass] + [ALLOCNO_COVER_CLASS (another_allocno)] + : ira_register_move_cost[mode] + [ALLOCNO_COVER_CLASS (another_allocno)][rclass]); + if (decr_p) + cost = -cost; + ira_allocate_and_set_or_copy_costs + (&ALLOCNO_UPDATED_HARD_REG_COSTS (another_allocno), cover_class, + ALLOCNO_COVER_CLASS_COST (another_allocno), + ALLOCNO_HARD_REG_COSTS (another_allocno)); + ira_allocate_and_set_or_copy_costs + (&ALLOCNO_UPDATED_CONFLICT_HARD_REG_COSTS (another_allocno), + cover_class, 0, + ALLOCNO_CONFLICT_HARD_REG_COSTS (another_allocno)); + update_cost = cp->freq * cost / divisor; + ALLOCNO_UPDATED_HARD_REG_COSTS (another_allocno)[i] += update_cost; + ALLOCNO_UPDATED_CONFLICT_HARD_REG_COSTS (another_allocno)[i] + += update_cost; + if (update_cost != 0) + update_copy_costs_1 (another_allocno, hard_regno, + decr_p, divisor * 4); + } +} + +/* Update the cost of allocnos to increase chances to remove some + copies as the result of subsequent assignment. */ +static void +update_copy_costs (ira_allocno_t allocno, bool decr_p) +{ + update_cost_check++; + update_copy_costs_1 (allocno, ALLOCNO_HARD_REGNO (allocno), decr_p, 1); +} + +/* Sort allocnos according to the profit of usage of a hard register + instead of memory for them. */ +static int +allocno_cost_compare_func (const void *v1p, const void *v2p) +{ + ira_allocno_t p1 = *(const ira_allocno_t *) v1p; + ira_allocno_t p2 = *(const ira_allocno_t *) v2p; + int c1, c2; + + c1 = ALLOCNO_UPDATED_MEMORY_COST (p1) - ALLOCNO_COVER_CLASS_COST (p1); + c2 = ALLOCNO_UPDATED_MEMORY_COST (p2) - ALLOCNO_COVER_CLASS_COST (p2); + if (c1 - c2) + return c1 - c2; + + /* If regs are equally good, sort by allocno numbers, so that the + results of qsort leave nothing to chance. */ + return ALLOCNO_NUM (p1) - ALLOCNO_NUM (p2); +} + +/* Print all allocnos coalesced with ALLOCNO. */ +static void +print_coalesced_allocno (ira_allocno_t allocno) +{ + ira_allocno_t a; + + for (a = ALLOCNO_NEXT_COALESCED_ALLOCNO (allocno);; + a = ALLOCNO_NEXT_COALESCED_ALLOCNO (a)) + { + ira_print_expanded_allocno (a); + if (a == allocno) + break; + fprintf (ira_dump_file, "+"); + } +} + +/* Choose a hard register for ALLOCNO (or for all coalesced allocnos + represented by ALLOCNO). If RETRY_P is TRUE, it means that the + function called from function `ira_reassign_conflict_allocnos' and + `allocno_reload_assign'. This function implements the optimistic + coalescing too: if we failed to assign a hard register to set of + the coalesced allocnos, we put them onto the coloring stack for + subsequent separate assigning. */ +static bool +assign_hard_reg (ira_allocno_t allocno, bool retry_p) +{ + HARD_REG_SET conflicting_regs; + int i, j, hard_regno, best_hard_regno, class_size; + int cost, mem_cost, min_cost, full_cost, min_full_cost, add_cost; + int *a_costs; + int *conflict_costs; + enum reg_class cover_class, rclass; + enum machine_mode mode; + ira_allocno_t a, conflict_allocno; + ira_allocno_t another_allocno; + ira_allocno_conflict_iterator aci; + ira_copy_t cp, next_cp; + static int costs[FIRST_PSEUDO_REGISTER], full_costs[FIRST_PSEUDO_REGISTER]; +#ifdef STACK_REGS + bool no_stack_reg_p; +#endif + + ira_assert (! ALLOCNO_ASSIGNED_P (allocno)); + cover_class = ALLOCNO_COVER_CLASS (allocno); + class_size = ira_class_hard_regs_num[cover_class]; + mode = ALLOCNO_MODE (allocno); + CLEAR_HARD_REG_SET (conflicting_regs); + best_hard_regno = -1; + memset (full_costs, 0, sizeof (int) * class_size); + mem_cost = 0; + if (allocno_coalesced_p) + bitmap_clear (processed_coalesced_allocno_bitmap); + memset (costs, 0, sizeof (int) * class_size); + memset (full_costs, 0, sizeof (int) * class_size); +#ifdef STACK_REGS + no_stack_reg_p = false; +#endif + for (a = ALLOCNO_NEXT_COALESCED_ALLOCNO (allocno);; + a = ALLOCNO_NEXT_COALESCED_ALLOCNO (a)) + { + mem_cost += ALLOCNO_UPDATED_MEMORY_COST (a); + IOR_HARD_REG_SET (conflicting_regs, + ALLOCNO_TOTAL_CONFLICT_HARD_REGS (a)); + ira_allocate_and_copy_costs (&ALLOCNO_UPDATED_HARD_REG_COSTS (a), + cover_class, ALLOCNO_HARD_REG_COSTS (a)); + a_costs = ALLOCNO_UPDATED_HARD_REG_COSTS (a); +#ifdef STACK_REGS + no_stack_reg_p = no_stack_reg_p || ALLOCNO_TOTAL_NO_STACK_REG_P (a); +#endif + for (cost = ALLOCNO_COVER_CLASS_COST (a), i = 0; i < class_size; i++) + if (a_costs != NULL) + { + costs[i] += a_costs[i]; + full_costs[i] += a_costs[i]; + } + else + { + costs[i] += cost; + full_costs[i] += cost; + } + /* Take preferences of conflicting allocnos into account. */ + FOR_EACH_ALLOCNO_CONFLICT (a, conflict_allocno, aci) + /* Reload can give another class so we need to check all + allocnos. */ + if (retry_p || bitmap_bit_p (consideration_allocno_bitmap, + ALLOCNO_NUM (conflict_allocno))) + { + ira_assert (cover_class == ALLOCNO_COVER_CLASS (conflict_allocno)); + if (allocno_coalesced_p) + { + if (bitmap_bit_p (processed_coalesced_allocno_bitmap, + ALLOCNO_NUM (conflict_allocno))) + continue; + bitmap_set_bit (processed_coalesced_allocno_bitmap, + ALLOCNO_NUM (conflict_allocno)); + } + if (ALLOCNO_ASSIGNED_P (conflict_allocno)) + { + if ((hard_regno = ALLOCNO_HARD_REGNO (conflict_allocno)) >= 0) + { + IOR_HARD_REG_SET + (conflicting_regs, + ira_reg_mode_hard_regset + [hard_regno][ALLOCNO_MODE (conflict_allocno)]); + if (hard_reg_set_subset_p (reg_class_contents[cover_class], + conflicting_regs)) + goto fail; + } + continue; + } + else if (! ALLOCNO_MAY_BE_SPILLED_P (conflict_allocno)) + { + ira_allocate_and_copy_costs + (&ALLOCNO_UPDATED_CONFLICT_HARD_REG_COSTS (conflict_allocno), + cover_class, + ALLOCNO_CONFLICT_HARD_REG_COSTS (conflict_allocno)); + conflict_costs + = ALLOCNO_UPDATED_CONFLICT_HARD_REG_COSTS (conflict_allocno); + if (conflict_costs != NULL) + for (j = class_size - 1; j >= 0; j--) + full_costs[j] -= conflict_costs[j]; + } + } + if (a == allocno) + break; + } + /* Take copies into account. */ + for (a = ALLOCNO_NEXT_COALESCED_ALLOCNO (allocno);; + a = ALLOCNO_NEXT_COALESCED_ALLOCNO (a)) + { + for (cp = ALLOCNO_COPIES (a); cp != NULL; cp = next_cp) + { + if (cp->first == a) + { + next_cp = cp->next_first_allocno_copy; + another_allocno = cp->second; + } + else if (cp->second == a) + { + next_cp = cp->next_second_allocno_copy; + another_allocno = cp->first; + } + else + gcc_unreachable (); + if (cover_class != ALLOCNO_COVER_CLASS (another_allocno) + || ALLOCNO_ASSIGNED_P (another_allocno)) + continue; + ira_allocate_and_copy_costs + (&ALLOCNO_UPDATED_CONFLICT_HARD_REG_COSTS (another_allocno), + cover_class, ALLOCNO_CONFLICT_HARD_REG_COSTS (another_allocno)); + conflict_costs + = ALLOCNO_UPDATED_CONFLICT_HARD_REG_COSTS (another_allocno); + if (conflict_costs != NULL + && ! ALLOCNO_MAY_BE_SPILLED_P (another_allocno)) + for (j = class_size - 1; j >= 0; j--) + full_costs[j] += conflict_costs[j]; + } + if (a == allocno) + break; + } + min_cost = min_full_cost = INT_MAX; + /* We don't care about giving callee saved registers to allocnos no + living through calls because call clobbered registers are + allocated first (it is usual practice to put them first in + REG_ALLOC_ORDER). */ + for (i = 0; i < class_size; i++) + { + hard_regno = ira_class_hard_regs[cover_class][i]; +#ifdef STACK_REGS + if (no_stack_reg_p + && FIRST_STACK_REG <= hard_regno && hard_regno <= LAST_STACK_REG) + continue; +#endif + if (! ira_hard_reg_not_in_set_p (hard_regno, mode, conflicting_regs) + || TEST_HARD_REG_BIT (prohibited_class_mode_regs[cover_class][mode], + hard_regno)) + continue; + cost = costs[i]; + full_cost = full_costs[i]; + if (! allocated_hardreg_p[hard_regno] + && ira_hard_reg_not_in_set_p (hard_regno, mode, call_used_reg_set)) + /* We need to save/restore the hard register in + epilogue/prologue. Therefore we increase the cost. */ + { + /* ??? If only part is call clobbered. */ + rclass = REGNO_REG_CLASS (hard_regno); + add_cost = (ira_memory_move_cost[mode][rclass][0] + + ira_memory_move_cost[mode][rclass][1] - 1); + cost += add_cost; + full_cost += add_cost; + } + if (min_cost > cost) + min_cost = cost; + if (min_full_cost > full_cost) + { + min_full_cost = full_cost; + best_hard_regno = hard_regno; + ira_assert (hard_regno >= 0); + } + } + if (min_full_cost > mem_cost) + { + if (! retry_p && internal_flag_ira_verbose > 3 && ira_dump_file != NULL) + fprintf (ira_dump_file, "(memory is more profitable %d vs %d) ", + mem_cost, min_full_cost); + best_hard_regno = -1; + } + fail: + if (best_hard_regno < 0 + && ALLOCNO_NEXT_COALESCED_ALLOCNO (allocno) != allocno) + { + for (j = 0, a = ALLOCNO_NEXT_COALESCED_ALLOCNO (allocno);; + a = ALLOCNO_NEXT_COALESCED_ALLOCNO (a)) + { + sorted_allocnos[j++] = a; + if (a == allocno) + break; + } + qsort (sorted_allocnos, j, sizeof (ira_allocno_t), + allocno_cost_compare_func); + for (i = 0; i < j; i++) + { + a = sorted_allocnos[i]; + ALLOCNO_FIRST_COALESCED_ALLOCNO (a) = a; + ALLOCNO_NEXT_COALESCED_ALLOCNO (a) = a; + VEC_safe_push (ira_allocno_t, heap, allocno_stack_vec, a); + if (internal_flag_ira_verbose > 3 && ira_dump_file != NULL) + { + fprintf (ira_dump_file, " Pushing"); + print_coalesced_allocno (a); + fprintf (ira_dump_file, "\n"); + } + } + return false; + } + if (best_hard_regno >= 0) + allocated_hardreg_p[best_hard_regno] = true; + for (a = ALLOCNO_NEXT_COALESCED_ALLOCNO (allocno);; + a = ALLOCNO_NEXT_COALESCED_ALLOCNO (a)) + { + ALLOCNO_HARD_REGNO (a) = best_hard_regno; + ALLOCNO_ASSIGNED_P (a) = true; + if (best_hard_regno >= 0) + update_copy_costs (a, true); + ira_assert (ALLOCNO_COVER_CLASS (a) == cover_class); + /* We don't need updated costs anymore: */ + ira_free_allocno_updated_costs (a); + if (a == allocno) + break; + } + return best_hard_regno >= 0; +} + + + +/* This page contains the allocator based on the Chaitin-Briggs algorithm. */ + +/* Bucket of allocnos that can colored currently without spilling. */ +static ira_allocno_t colorable_allocno_bucket; + +/* Bucket of allocnos that might be not colored currently without + spilling. */ +static ira_allocno_t uncolorable_allocno_bucket; + +/* Each element of the array contains the current number of allocnos + of given *cover* class in the uncolorable_bucket. */ +static int uncolorable_allocnos_num[N_REG_CLASSES]; + +/* Add ALLOCNO to bucket *BUCKET_PTR. ALLOCNO should be not in a bucket + before the call. */ +static void +add_ira_allocno_to_bucket (ira_allocno_t allocno, ira_allocno_t *bucket_ptr) +{ + ira_allocno_t first_allocno; + enum reg_class cover_class; + + if (bucket_ptr == &uncolorable_allocno_bucket + && (cover_class = ALLOCNO_COVER_CLASS (allocno)) != NO_REGS) + { + uncolorable_allocnos_num[cover_class]++; + ira_assert (uncolorable_allocnos_num[cover_class] > 0); + } + first_allocno = *bucket_ptr; + ALLOCNO_NEXT_BUCKET_ALLOCNO (allocno) = first_allocno; + ALLOCNO_PREV_BUCKET_ALLOCNO (allocno) = NULL; + if (first_allocno != NULL) + ALLOCNO_PREV_BUCKET_ALLOCNO (first_allocno) = allocno; + *bucket_ptr = allocno; +} + +/* The function returns frequency and number of available hard + registers for allocnos coalesced with ALLOCNO. */ +static void +get_coalesced_allocnos_attributes (ira_allocno_t allocno, int *freq, int *num) +{ + ira_allocno_t a; + + *freq = 0; + *num = 0; + for (a = ALLOCNO_NEXT_COALESCED_ALLOCNO (allocno);; + a = ALLOCNO_NEXT_COALESCED_ALLOCNO (a)) + { + *freq += ALLOCNO_FREQ (a); + *num += ALLOCNO_AVAILABLE_REGS_NUM (a); + if (a == allocno) + break; + } +} + +/* Compare two allocnos to define which allocno should be pushed first + into the coloring stack. If the return is a negative number, the + allocno given by the first parameter will be pushed first. In this + case such allocno has less priority than the second one and the + hard register will be assigned to it after assignment to the second + one. As the result of such assignment order, the second allocno + has a better chance to get the best hard register. */ +static int +bucket_allocno_compare_func (const void *v1p, const void *v2p) +{ + ira_allocno_t a1 = *(const ira_allocno_t *) v1p; + ira_allocno_t a2 = *(const ira_allocno_t *) v2p; + int diff, a1_freq, a2_freq, a1_num, a2_num; + + if ((diff = (int) ALLOCNO_COVER_CLASS (a2) - ALLOCNO_COVER_CLASS (a1)) != 0) + return diff; + get_coalesced_allocnos_attributes (a1, &a1_freq, &a1_num); + get_coalesced_allocnos_attributes (a2, &a2_freq, &a2_num); + if ((diff = a2_num - a1_num) != 0) + return diff; + else if ((diff = a1_freq - a2_freq) != 0) + return diff; + return ALLOCNO_NUM (a2) - ALLOCNO_NUM (a1); +} + +/* Sort bucket *BUCKET_PTR and return the result through + BUCKET_PTR. */ +static void +sort_bucket (ira_allocno_t *bucket_ptr) +{ + ira_allocno_t a, head; + int n; + + for (n = 0, a = *bucket_ptr; a != NULL; a = ALLOCNO_NEXT_BUCKET_ALLOCNO (a)) + sorted_allocnos[n++] = a; + if (n <= 1) + return; + qsort (sorted_allocnos, n, sizeof (ira_allocno_t), + bucket_allocno_compare_func); + head = NULL; + for (n--; n >= 0; n--) + { + a = sorted_allocnos[n]; + ALLOCNO_NEXT_BUCKET_ALLOCNO (a) = head; + ALLOCNO_PREV_BUCKET_ALLOCNO (a) = NULL; + if (head != NULL) + ALLOCNO_PREV_BUCKET_ALLOCNO (head) = a; + head = a; + } + *bucket_ptr = head; +} + +/* Add ALLOCNO to bucket *BUCKET_PTR maintaining the order according + their priority. ALLOCNO should be not in a bucket before the + call. */ +static void +add_ira_allocno_to_ordered_bucket (ira_allocno_t allocno, + ira_allocno_t *bucket_ptr) +{ + ira_allocno_t before, after; + enum reg_class cover_class; + + if (bucket_ptr == &uncolorable_allocno_bucket + && (cover_class = ALLOCNO_COVER_CLASS (allocno)) != NO_REGS) + { + uncolorable_allocnos_num[cover_class]++; + ira_assert (uncolorable_allocnos_num[cover_class] > 0); + } + for (before = *bucket_ptr, after = NULL; + before != NULL; + after = before, before = ALLOCNO_NEXT_BUCKET_ALLOCNO (before)) + if (bucket_allocno_compare_func (&allocno, &before) < 0) + break; + ALLOCNO_NEXT_BUCKET_ALLOCNO (allocno) = before; + ALLOCNO_PREV_BUCKET_ALLOCNO (allocno) = after; + if (after == NULL) + *bucket_ptr = allocno; + else + ALLOCNO_NEXT_BUCKET_ALLOCNO (after) = allocno; + if (before != NULL) + ALLOCNO_PREV_BUCKET_ALLOCNO (before) = allocno; +} + +/* Delete ALLOCNO from bucket *BUCKET_PTR. It should be there before + the call. */ +static void +delete_allocno_from_bucket (ira_allocno_t allocno, ira_allocno_t *bucket_ptr) +{ + ira_allocno_t prev_allocno, next_allocno; + enum reg_class cover_class; + + if (bucket_ptr == &uncolorable_allocno_bucket + && (cover_class = ALLOCNO_COVER_CLASS (allocno)) != NO_REGS) + { + uncolorable_allocnos_num[cover_class]--; + ira_assert (uncolorable_allocnos_num[cover_class] >= 0); + } + prev_allocno = ALLOCNO_PREV_BUCKET_ALLOCNO (allocno); + next_allocno = ALLOCNO_NEXT_BUCKET_ALLOCNO (allocno); + if (prev_allocno != NULL) + ALLOCNO_NEXT_BUCKET_ALLOCNO (prev_allocno) = next_allocno; + else + { + ira_assert (*bucket_ptr == allocno); + *bucket_ptr = next_allocno; + } + if (next_allocno != NULL) + ALLOCNO_PREV_BUCKET_ALLOCNO (next_allocno) = prev_allocno; +} + +/* Splay tree for each cover class. The trees are indexed by the + corresponding cover classes. Splay trees contain uncolorable + allocnos. */ +static splay_tree uncolorable_allocnos_splay_tree[N_REG_CLASSES]; + +/* If the following macro is TRUE, splay tree is used to choose an + allocno of the corresponding cover class for spilling. When the + number uncolorable allocnos of given cover class decreases to some + threshold, linear array search is used to find the best allocno for + spilling. This threshold is actually pretty big because, although + splay trees asymptotically is much faster, each splay tree + operation is sufficiently costly especially taking cache locality + into account. */ +#define USE_SPLAY_P(CLASS) (uncolorable_allocnos_num[CLASS] > 4000) + +/* Put ALLOCNO onto the coloring stack without removing it from its + bucket. Pushing allocno to the coloring stack can result in moving + conflicting allocnos from the uncolorable bucket to the colorable + one. */ +static void +push_ira_allocno_to_stack (ira_allocno_t allocno) +{ + int conflicts_num, conflict_size, size; + ira_allocno_t a, conflict_allocno; + enum reg_class cover_class; + ira_allocno_conflict_iterator aci; + + ALLOCNO_IN_GRAPH_P (allocno) = false; + VEC_safe_push (ira_allocno_t, heap, allocno_stack_vec, allocno); + cover_class = ALLOCNO_COVER_CLASS (allocno); + if (cover_class == NO_REGS) + return; + size = ira_reg_class_nregs[cover_class][ALLOCNO_MODE (allocno)]; + if (allocno_coalesced_p) + bitmap_clear (processed_coalesced_allocno_bitmap); + for (a = ALLOCNO_NEXT_COALESCED_ALLOCNO (allocno);; + a = ALLOCNO_NEXT_COALESCED_ALLOCNO (a)) + { + FOR_EACH_ALLOCNO_CONFLICT (a, conflict_allocno, aci) + if (bitmap_bit_p (coloring_allocno_bitmap, + ALLOCNO_NUM (conflict_allocno))) + { + ira_assert (cover_class == ALLOCNO_COVER_CLASS (conflict_allocno)); + if (allocno_coalesced_p) + { + if (bitmap_bit_p (processed_coalesced_allocno_bitmap, + ALLOCNO_NUM (conflict_allocno))) + continue; + bitmap_set_bit (processed_coalesced_allocno_bitmap, + ALLOCNO_NUM (conflict_allocno)); + } + if (ALLOCNO_IN_GRAPH_P (conflict_allocno) + && ! ALLOCNO_ASSIGNED_P (conflict_allocno)) + { + conflicts_num = ALLOCNO_LEFT_CONFLICTS_NUM (conflict_allocno); + conflict_size + = (ira_reg_class_nregs + [cover_class][ALLOCNO_MODE (conflict_allocno)]); + ira_assert + (ALLOCNO_LEFT_CONFLICTS_NUM (conflict_allocno) >= size); + if (conflicts_num + conflict_size + <= ALLOCNO_AVAILABLE_REGS_NUM (conflict_allocno)) + { + ALLOCNO_LEFT_CONFLICTS_NUM (conflict_allocno) -= size; + continue; + } + conflicts_num + = ALLOCNO_LEFT_CONFLICTS_NUM (conflict_allocno) - size; + if (uncolorable_allocnos_splay_tree[cover_class] != NULL + && !ALLOCNO_SPLAY_REMOVED_P (conflict_allocno) + && USE_SPLAY_P (cover_class)) + { + ira_assert + (splay_tree_lookup + (uncolorable_allocnos_splay_tree[cover_class], + (splay_tree_key) conflict_allocno) != NULL); + splay_tree_remove + (uncolorable_allocnos_splay_tree[cover_class], + (splay_tree_key) conflict_allocno); + ALLOCNO_SPLAY_REMOVED_P (conflict_allocno) = true; + VEC_safe_push (ira_allocno_t, heap, + removed_splay_allocno_vec, + conflict_allocno); + } + ALLOCNO_LEFT_CONFLICTS_NUM (conflict_allocno) = conflicts_num; + if (conflicts_num + conflict_size + <= ALLOCNO_AVAILABLE_REGS_NUM (conflict_allocno)) + { + delete_allocno_from_bucket (conflict_allocno, + &uncolorable_allocno_bucket); + add_ira_allocno_to_ordered_bucket (conflict_allocno, + &colorable_allocno_bucket); + } + } + } + if (a == allocno) + break; + } +} + +/* Put ALLOCNO onto the coloring stack and remove it from its bucket. + The allocno is in the colorable bucket if COLORABLE_P is TRUE. */ +static void +remove_allocno_from_bucket_and_push (ira_allocno_t allocno, bool colorable_p) +{ + enum reg_class cover_class; + + if (colorable_p) + delete_allocno_from_bucket (allocno, &colorable_allocno_bucket); + else + delete_allocno_from_bucket (allocno, &uncolorable_allocno_bucket); + if (internal_flag_ira_verbose > 3 && ira_dump_file != NULL) + { + fprintf (ira_dump_file, " Pushing"); + print_coalesced_allocno (allocno); + fprintf (ira_dump_file, "%s\n", colorable_p ? "" : "(potential spill)"); + } + cover_class = ALLOCNO_COVER_CLASS (allocno); + ira_assert ((colorable_p + && (ALLOCNO_LEFT_CONFLICTS_NUM (allocno) + + ira_reg_class_nregs[cover_class][ALLOCNO_MODE (allocno)] + <= ALLOCNO_AVAILABLE_REGS_NUM (allocno))) + || (! colorable_p + && (ALLOCNO_LEFT_CONFLICTS_NUM (allocno) + + ira_reg_class_nregs[cover_class][ALLOCNO_MODE + (allocno)] + > ALLOCNO_AVAILABLE_REGS_NUM (allocno)))); + if (! colorable_p) + ALLOCNO_MAY_BE_SPILLED_P (allocno) = true; + push_ira_allocno_to_stack (allocno); +} + +/* Put all allocnos from colorable bucket onto the coloring stack. */ +static void +push_only_colorable (void) +{ + sort_bucket (&colorable_allocno_bucket); + for (;colorable_allocno_bucket != NULL;) + remove_allocno_from_bucket_and_push (colorable_allocno_bucket, true); +} + +/* Puts ALLOCNO chosen for potential spilling onto the coloring + stack. */ +static void +push_ira_allocno_to_spill (ira_allocno_t allocno) +{ + delete_allocno_from_bucket (allocno, &uncolorable_allocno_bucket); + ALLOCNO_MAY_BE_SPILLED_P (allocno) = true; + if (internal_flag_ira_verbose > 3 && ira_dump_file != NULL) + fprintf (ira_dump_file, " Pushing p%d(%d) (potential spill)\n", + ALLOCNO_NUM (allocno), ALLOCNO_REGNO (allocno)); + push_ira_allocno_to_stack (allocno); +} + +/* Return the frequency of exit edges (if EXIT_P) or entry from/to the + loop given by its LOOP_NODE. */ +int +ira_loop_edge_freq (ira_loop_tree_node_t loop_node, int regno, bool exit_p) +{ + int freq, i; + edge_iterator ei; + edge e; + VEC (edge, heap) *edges; + + ira_assert (loop_node->loop != NULL + && (regno < 0 || regno >= FIRST_PSEUDO_REGISTER)); + freq = 0; + if (! exit_p) + { + FOR_EACH_EDGE (e, ei, loop_node->loop->header->preds) + if (e->src != loop_node->loop->latch + && (regno < 0 + || (bitmap_bit_p (DF_LR_OUT (e->src), regno) + && bitmap_bit_p (DF_LR_IN (e->dest), regno)))) + freq += EDGE_FREQUENCY (e); + } + else + { + edges = get_loop_exit_edges (loop_node->loop); + for (i = 0; VEC_iterate (edge, edges, i, e); i++) + if (regno < 0 + || (bitmap_bit_p (DF_LR_OUT (e->src), regno) + && bitmap_bit_p (DF_LR_IN (e->dest), regno))) + freq += EDGE_FREQUENCY (e); + VEC_free (edge, heap, edges); + } + + return REG_FREQ_FROM_EDGE_FREQ (freq); +} + +/* Calculate and return the cost of putting allocno A into memory. */ +static int +calculate_allocno_spill_cost (ira_allocno_t a) +{ + int regno, cost; + enum machine_mode mode; + enum reg_class rclass; + ira_allocno_t parent_allocno; + ira_loop_tree_node_t parent_node, loop_node; + + regno = ALLOCNO_REGNO (a); + cost = ALLOCNO_UPDATED_MEMORY_COST (a) - ALLOCNO_COVER_CLASS_COST (a); + if (ALLOCNO_CAP (a) != NULL) + return cost; + loop_node = ALLOCNO_LOOP_TREE_NODE (a); + if ((parent_node = loop_node->parent) == NULL) + return cost; + if ((parent_allocno = parent_node->regno_allocno_map[regno]) == NULL) + return cost; + mode = ALLOCNO_MODE (a); + rclass = ALLOCNO_COVER_CLASS (a); + if (ALLOCNO_HARD_REGNO (parent_allocno) < 0) + cost -= (ira_memory_move_cost[mode][rclass][0] + * ira_loop_edge_freq (loop_node, regno, true) + + ira_memory_move_cost[mode][rclass][1] + * ira_loop_edge_freq (loop_node, regno, false)); + else + cost += ((ira_memory_move_cost[mode][rclass][1] + * ira_loop_edge_freq (loop_node, regno, true) + + ira_memory_move_cost[mode][rclass][0] + * ira_loop_edge_freq (loop_node, regno, false)) + - (ira_register_move_cost[mode][rclass][rclass] + * (ira_loop_edge_freq (loop_node, regno, false) + + ira_loop_edge_freq (loop_node, regno, true)))); + return cost; +} + +/* Compare keys in the splay tree used to choose best allocno for + spilling. The best allocno has the minimal key. */ +static int +allocno_spill_priority_compare (splay_tree_key k1, splay_tree_key k2) +{ + int pri1, pri2, diff; + ira_allocno_t a1 = (ira_allocno_t) k1, a2 = (ira_allocno_t) k2; + + pri1 = (IRA_ALLOCNO_TEMP (a1) + / (ALLOCNO_LEFT_CONFLICTS_NUM (a1) + * ira_reg_class_nregs[ALLOCNO_COVER_CLASS (a1)][ALLOCNO_MODE (a1)] + + 1)); + pri2 = (IRA_ALLOCNO_TEMP (a2) + / (ALLOCNO_LEFT_CONFLICTS_NUM (a2) + * ira_reg_class_nregs[ALLOCNO_COVER_CLASS (a2)][ALLOCNO_MODE (a2)] + + 1)); + if ((diff = pri1 - pri2) != 0) + return diff; + if ((diff = IRA_ALLOCNO_TEMP (a1) - IRA_ALLOCNO_TEMP (a2)) != 0) + return diff; + return ALLOCNO_NUM (a1) - ALLOCNO_NUM (a2); +} + +/* Allocate data of SIZE for the splay trees. We allocate only spay + tree roots or splay tree nodes. If you change this, please rewrite + the function. */ +static void * +splay_tree_allocate (int size, void *data ATTRIBUTE_UNUSED) +{ + if (size != sizeof (struct splay_tree_node_s)) + return ira_allocate (size); + return pool_alloc (splay_tree_node_pool); +} + +/* Free data NODE for the splay trees. We allocate and free only spay + tree roots or splay tree nodes. If you change this, please rewrite + the function. */ +static void +splay_tree_free (void *node, void *data ATTRIBUTE_UNUSED) +{ + int i; + enum reg_class cover_class; + + for (i = 0; i < ira_reg_class_cover_size; i++) + { + cover_class = ira_reg_class_cover[i]; + if (node == uncolorable_allocnos_splay_tree[cover_class]) + { + ira_free (node); + return; + } + } + pool_free (splay_tree_node_pool, node); +} + +/* Push allocnos to the coloring stack. The order of allocnos in the + stack defines the order for the subsequent coloring. */ +static void +push_allocnos_to_stack (void) +{ + ira_allocno_t allocno, a, i_allocno, *allocno_vec; + enum reg_class cover_class, rclass; + int allocno_pri, i_allocno_pri, allocno_cost, i_allocno_cost; + int i, j, num, cover_class_allocnos_num[N_REG_CLASSES]; + ira_allocno_t *cover_class_allocnos[N_REG_CLASSES]; + int cost; + + /* Initialize. */ + for (i = 0; i < ira_reg_class_cover_size; i++) + { + cover_class = ira_reg_class_cover[i]; + cover_class_allocnos_num[cover_class] = 0; + cover_class_allocnos[cover_class] = NULL; + uncolorable_allocnos_splay_tree[cover_class] = NULL; + } + /* Calculate uncolorable allocno spill costs. */ + for (allocno = uncolorable_allocno_bucket; + allocno != NULL; + allocno = ALLOCNO_NEXT_BUCKET_ALLOCNO (allocno)) + if ((cover_class = ALLOCNO_COVER_CLASS (allocno)) != NO_REGS) + { + cover_class_allocnos_num[cover_class]++; + cost = 0; + for (a = ALLOCNO_NEXT_COALESCED_ALLOCNO (allocno);; + a = ALLOCNO_NEXT_COALESCED_ALLOCNO (a)) + { + cost += calculate_allocno_spill_cost (a); + if (a == allocno) + break; + } + /* ??? Remove cost of copies between the coalesced + allocnos. */ + IRA_ALLOCNO_TEMP (allocno) = cost; + } + /* Define place where to put uncolorable allocnos of the same cover + class. */ + for (num = i = 0; i < ira_reg_class_cover_size; i++) + { + cover_class = ira_reg_class_cover[i]; + ira_assert (cover_class_allocnos_num[cover_class] + == uncolorable_allocnos_num[cover_class]); + if (cover_class_allocnos_num[cover_class] != 0) + { + cover_class_allocnos[cover_class] = allocnos_for_spilling + num; + num += cover_class_allocnos_num[cover_class]; + cover_class_allocnos_num[cover_class] = 0; + } + if (USE_SPLAY_P (cover_class)) + uncolorable_allocnos_splay_tree[cover_class] + = splay_tree_new_with_allocator (allocno_spill_priority_compare, + NULL, NULL, splay_tree_allocate, + splay_tree_free, NULL); + } + ira_assert (num <= ira_allocnos_num); + /* Collect uncolorable allocnos of each cover class. */ + for (allocno = uncolorable_allocno_bucket; + allocno != NULL; + allocno = ALLOCNO_NEXT_BUCKET_ALLOCNO (allocno)) + if ((cover_class = ALLOCNO_COVER_CLASS (allocno)) != NO_REGS) + { + cover_class_allocnos + [cover_class][cover_class_allocnos_num[cover_class]++] = allocno; + if (uncolorable_allocnos_splay_tree[cover_class] != NULL) + splay_tree_insert (uncolorable_allocnos_splay_tree[cover_class], + (splay_tree_key) allocno, + (splay_tree_value) allocno); + } + for (;;) + { + push_only_colorable (); + allocno = uncolorable_allocno_bucket; + if (allocno == NULL) + break; + cover_class = ALLOCNO_COVER_CLASS (allocno); + if (cover_class == NO_REGS) + { + push_ira_allocno_to_spill (allocno); + continue; + } + /* Potential spilling. */ + ira_assert + (ira_reg_class_nregs[cover_class][ALLOCNO_MODE (allocno)] > 0); + if (USE_SPLAY_P (cover_class)) + { + for (;VEC_length (ira_allocno_t, removed_splay_allocno_vec) != 0;) + { + allocno = VEC_pop (ira_allocno_t, removed_splay_allocno_vec); + ALLOCNO_SPLAY_REMOVED_P (allocno) = false; + rclass = ALLOCNO_COVER_CLASS (allocno); + if (ALLOCNO_LEFT_CONFLICTS_NUM (allocno) + + ira_reg_class_nregs [rclass][ALLOCNO_MODE (allocno)] + > ALLOCNO_AVAILABLE_REGS_NUM (allocno)) + splay_tree_insert + (uncolorable_allocnos_splay_tree[rclass], + (splay_tree_key) allocno, (splay_tree_value) allocno); + } + allocno = ((ira_allocno_t) + splay_tree_min + (uncolorable_allocnos_splay_tree[cover_class])->key); + splay_tree_remove (uncolorable_allocnos_splay_tree[cover_class], + (splay_tree_key) allocno); + } + else + { + num = cover_class_allocnos_num[cover_class]; + ira_assert (num > 0); + allocno_vec = cover_class_allocnos[cover_class]; + allocno = NULL; + allocno_pri = allocno_cost = 0; + /* Sort uncolorable allocno to find the one with the lowest + spill cost. */ + for (i = 0, j = num - 1; i <= j;) + { + i_allocno = allocno_vec[i]; + if (! ALLOCNO_IN_GRAPH_P (i_allocno) + && ALLOCNO_IN_GRAPH_P (allocno_vec[j])) + { + i_allocno = allocno_vec[j]; + allocno_vec[j] = allocno_vec[i]; + allocno_vec[i] = i_allocno; + } + if (ALLOCNO_IN_GRAPH_P (i_allocno)) + { + i++; + if (IRA_ALLOCNO_TEMP (i_allocno) == INT_MAX) + { + ira_allocno_t a; + int cost = 0; + + for (a = ALLOCNO_NEXT_COALESCED_ALLOCNO (i_allocno);; + a = ALLOCNO_NEXT_COALESCED_ALLOCNO (a)) + { + cost += calculate_allocno_spill_cost (i_allocno); + if (a == i_allocno) + break; + } + /* ??? Remove cost of copies between the coalesced + allocnos. */ + IRA_ALLOCNO_TEMP (i_allocno) = cost; + } + i_allocno_cost = IRA_ALLOCNO_TEMP (i_allocno); + i_allocno_pri + = (i_allocno_cost + / (ALLOCNO_LEFT_CONFLICTS_NUM (i_allocno) + * ira_reg_class_nregs[ALLOCNO_COVER_CLASS + (i_allocno)] + [ALLOCNO_MODE (i_allocno)] + 1)); + if (allocno == NULL || allocno_pri > i_allocno_pri + || (allocno_pri == i_allocno_pri + && (allocno_cost > i_allocno_cost + || (allocno_cost == i_allocno_cost + && (ALLOCNO_NUM (allocno) + > ALLOCNO_NUM (i_allocno)))))) + { + allocno = i_allocno; + allocno_cost = i_allocno_cost; + allocno_pri = i_allocno_pri; + } + } + if (! ALLOCNO_IN_GRAPH_P (allocno_vec[j])) + j--; + } + ira_assert (allocno != NULL && j >= 0); + cover_class_allocnos_num[cover_class] = j + 1; + } + ira_assert (ALLOCNO_IN_GRAPH_P (allocno) + && ALLOCNO_COVER_CLASS (allocno) == cover_class + && (ALLOCNO_LEFT_CONFLICTS_NUM (allocno) + + ira_reg_class_nregs[cover_class][ALLOCNO_MODE + (allocno)] + > ALLOCNO_AVAILABLE_REGS_NUM (allocno))); + remove_allocno_from_bucket_and_push (allocno, false); + } + ira_assert (colorable_allocno_bucket == NULL + && uncolorable_allocno_bucket == NULL); + for (i = 0; i < ira_reg_class_cover_size; i++) + { + cover_class = ira_reg_class_cover[i]; + ira_assert (uncolorable_allocnos_num[cover_class] == 0); + if (uncolorable_allocnos_splay_tree[cover_class] != NULL) + splay_tree_delete (uncolorable_allocnos_splay_tree[cover_class]); + } +} + +/* Pop the coloring stack and assign hard registers to the popped + allocnos. */ +static void +pop_allocnos_from_stack (void) +{ + ira_allocno_t allocno; + enum reg_class cover_class; + + for (;VEC_length (ira_allocno_t, allocno_stack_vec) != 0;) + { + allocno = VEC_pop (ira_allocno_t, allocno_stack_vec); + cover_class = ALLOCNO_COVER_CLASS (allocno); + if (internal_flag_ira_verbose > 3 && ira_dump_file != NULL) + { + fprintf (ira_dump_file, " Popping"); + print_coalesced_allocno (allocno); + fprintf (ira_dump_file, " -- "); + } + if (cover_class == NO_REGS) + { + ALLOCNO_HARD_REGNO (allocno) = -1; + ALLOCNO_ASSIGNED_P (allocno) = true; + ira_assert (ALLOCNO_UPDATED_HARD_REG_COSTS (allocno) == NULL); + ira_assert + (ALLOCNO_UPDATED_CONFLICT_HARD_REG_COSTS (allocno) == NULL); + if (internal_flag_ira_verbose > 3 && ira_dump_file != NULL) + fprintf (ira_dump_file, "assign memory\n"); + } + else if (assign_hard_reg (allocno, false)) + { + if (internal_flag_ira_verbose > 3 && ira_dump_file != NULL) + fprintf (ira_dump_file, "assign reg %d\n", + ALLOCNO_HARD_REGNO (allocno)); + } + else if (ALLOCNO_ASSIGNED_P (allocno)) + { + if (internal_flag_ira_verbose > 3 && ira_dump_file != NULL) + fprintf (ira_dump_file, "spill\n"); + } + ALLOCNO_IN_GRAPH_P (allocno) = true; + } +} + +/* Set up number of available hard registers for ALLOCNO. */ +static void +setup_allocno_available_regs_num (ira_allocno_t allocno) +{ + int i, n, hard_regs_num; + enum reg_class cover_class; + ira_allocno_t a; + HARD_REG_SET temp_set; + + cover_class = ALLOCNO_COVER_CLASS (allocno); + ALLOCNO_AVAILABLE_REGS_NUM (allocno) = ira_available_class_regs[cover_class]; + if (cover_class == NO_REGS) + return; + CLEAR_HARD_REG_SET (temp_set); + ira_assert (ALLOCNO_FIRST_COALESCED_ALLOCNO (allocno) == allocno); + hard_regs_num = ira_class_hard_regs_num[cover_class]; + for (a = ALLOCNO_NEXT_COALESCED_ALLOCNO (allocno);; + a = ALLOCNO_NEXT_COALESCED_ALLOCNO (a)) + { + IOR_HARD_REG_SET (temp_set, ALLOCNO_TOTAL_CONFLICT_HARD_REGS (a)); + if (a == allocno) + break; + } + for (n = 0, i = hard_regs_num - 1; i >= 0; i--) + if (TEST_HARD_REG_BIT (temp_set, ira_class_hard_regs[cover_class][i])) + n++; + if (internal_flag_ira_verbose > 2 && n > 0 && ira_dump_file != NULL) + fprintf (ira_dump_file, " Reg %d of %s has %d regs less\n", + ALLOCNO_REGNO (allocno), reg_class_names[cover_class], n); + ALLOCNO_AVAILABLE_REGS_NUM (allocno) -= n; +} + +/* Set up ALLOCNO_LEFT_CONFLICTS_NUM for ALLOCNO. */ +static void +setup_allocno_left_conflicts_num (ira_allocno_t allocno) +{ + int i, hard_regs_num, hard_regno, conflict_allocnos_size; + ira_allocno_t a, conflict_allocno; + enum reg_class cover_class; + HARD_REG_SET temp_set; + ira_allocno_conflict_iterator aci; + + cover_class = ALLOCNO_COVER_CLASS (allocno); + hard_regs_num = ira_class_hard_regs_num[cover_class]; + CLEAR_HARD_REG_SET (temp_set); + ira_assert (ALLOCNO_FIRST_COALESCED_ALLOCNO (allocno) == allocno); + for (a = ALLOCNO_NEXT_COALESCED_ALLOCNO (allocno);; + a = ALLOCNO_NEXT_COALESCED_ALLOCNO (a)) + { + IOR_HARD_REG_SET (temp_set, ALLOCNO_TOTAL_CONFLICT_HARD_REGS (a)); + if (a == allocno) + break; + } + AND_HARD_REG_SET (temp_set, reg_class_contents[cover_class]); + AND_COMPL_HARD_REG_SET (temp_set, ira_no_alloc_regs); + conflict_allocnos_size = 0; + if (! hard_reg_set_equal_p (temp_set, ira_zero_hard_reg_set)) + for (i = 0; i < (int) hard_regs_num; i++) + { + hard_regno = ira_class_hard_regs[cover_class][i]; + if (TEST_HARD_REG_BIT (temp_set, hard_regno)) + { + conflict_allocnos_size++; + CLEAR_HARD_REG_BIT (temp_set, hard_regno); + if (hard_reg_set_equal_p (temp_set, ira_zero_hard_reg_set)) + break; + } + } + CLEAR_HARD_REG_SET (temp_set); + if (allocno_coalesced_p) + bitmap_clear (processed_coalesced_allocno_bitmap); + if (cover_class != NO_REGS) + for (a = ALLOCNO_NEXT_COALESCED_ALLOCNO (allocno);; + a = ALLOCNO_NEXT_COALESCED_ALLOCNO (a)) + { + FOR_EACH_ALLOCNO_CONFLICT (a, conflict_allocno, aci) + if (bitmap_bit_p (consideration_allocno_bitmap, + ALLOCNO_NUM (conflict_allocno))) + { + ira_assert (cover_class + == ALLOCNO_COVER_CLASS (conflict_allocno)); + if (allocno_coalesced_p) + { + if (bitmap_bit_p (processed_coalesced_allocno_bitmap, + ALLOCNO_NUM (conflict_allocno))) + continue; + bitmap_set_bit (processed_coalesced_allocno_bitmap, + ALLOCNO_NUM (conflict_allocno)); + } + if (! ALLOCNO_ASSIGNED_P (conflict_allocno)) + conflict_allocnos_size + += (ira_reg_class_nregs + [cover_class][ALLOCNO_MODE (conflict_allocno)]); + else if ((hard_regno = ALLOCNO_HARD_REGNO (conflict_allocno)) + >= 0) + { + int last = (hard_regno + + hard_regno_nregs + [hard_regno][ALLOCNO_MODE (conflict_allocno)]); + + while (hard_regno < last) + { + if (! TEST_HARD_REG_BIT (temp_set, hard_regno)) + { + conflict_allocnos_size++; + SET_HARD_REG_BIT (temp_set, hard_regno); + } + hard_regno++; + } + } + } + if (a == allocno) + break; + } + ALLOCNO_LEFT_CONFLICTS_NUM (allocno) = conflict_allocnos_size; +} + +/* Put ALLOCNO in a bucket corresponding to its number and size of its + conflicting allocnos and hard registers. */ +static void +put_allocno_into_bucket (ira_allocno_t allocno) +{ + int hard_regs_num; + enum reg_class cover_class; + + cover_class = ALLOCNO_COVER_CLASS (allocno); + hard_regs_num = ira_class_hard_regs_num[cover_class]; + if (ALLOCNO_FIRST_COALESCED_ALLOCNO (allocno) != allocno) + return; + ALLOCNO_IN_GRAPH_P (allocno) = true; + setup_allocno_left_conflicts_num (allocno); + setup_allocno_available_regs_num (allocno); + if (ALLOCNO_LEFT_CONFLICTS_NUM (allocno) + + ira_reg_class_nregs[cover_class][ALLOCNO_MODE (allocno)] + <= ALLOCNO_AVAILABLE_REGS_NUM (allocno)) + add_ira_allocno_to_bucket (allocno, &colorable_allocno_bucket); + else + add_ira_allocno_to_bucket (allocno, &uncolorable_allocno_bucket); +} + +/* The function is used to sort allocnos according to their execution + frequencies. */ +static int +copy_freq_compare_func (const void *v1p, const void *v2p) +{ + ira_copy_t cp1 = *(const ira_copy_t *) v1p, cp2 = *(const ira_copy_t *) v2p; + int pri1, pri2; + + pri1 = cp1->freq; + pri2 = cp2->freq; + if (pri2 - pri1) + return pri2 - pri1; + + /* If freqencies are equal, sort by copies, so that the results of + qsort leave nothing to chance. */ + return cp1->num - cp2->num; +} + +/* Merge two sets of coalesced allocnos given correspondingly by + allocnos A1 and A2 (more accurately merging A2 set into A1 + set). */ +static void +merge_allocnos (ira_allocno_t a1, ira_allocno_t a2) +{ + ira_allocno_t a, first, last, next; + + first = ALLOCNO_FIRST_COALESCED_ALLOCNO (a1); + if (first == ALLOCNO_FIRST_COALESCED_ALLOCNO (a2)) + return; + for (last = a2, a = ALLOCNO_NEXT_COALESCED_ALLOCNO (a2);; + a = ALLOCNO_NEXT_COALESCED_ALLOCNO (a)) + { + ALLOCNO_FIRST_COALESCED_ALLOCNO (a) = first; + if (a == a2) + break; + last = a; + } + next = ALLOCNO_NEXT_COALESCED_ALLOCNO (first); + ALLOCNO_NEXT_COALESCED_ALLOCNO (first) = a2; + ALLOCNO_NEXT_COALESCED_ALLOCNO (last) = next; +} + +/* Return TRUE if there are conflicting allocnos from two sets of + coalesced allocnos given correspondingly by allocnos A1 and A2. If + RELOAD_P is TRUE, we use live ranges to find conflicts because + conflicts are represented only for allocnos of the same cover class + and during the reload pass we coalesce allocnos for sharing stack + memory slots. */ +static bool +coalesced_allocno_conflict_p (ira_allocno_t a1, ira_allocno_t a2, + bool reload_p) +{ + ira_allocno_t a, conflict_allocno; + ira_allocno_conflict_iterator aci; + + if (allocno_coalesced_p) + { + bitmap_clear (processed_coalesced_allocno_bitmap); + for (a = ALLOCNO_NEXT_COALESCED_ALLOCNO (a1);; + a = ALLOCNO_NEXT_COALESCED_ALLOCNO (a)) + { + bitmap_set_bit (processed_coalesced_allocno_bitmap, ALLOCNO_NUM (a)); + if (a == a1) + break; + } + } + for (a = ALLOCNO_NEXT_COALESCED_ALLOCNO (a2);; + a = ALLOCNO_NEXT_COALESCED_ALLOCNO (a)) + { + if (reload_p) + { + for (conflict_allocno = ALLOCNO_NEXT_COALESCED_ALLOCNO (a1);; + conflict_allocno + = ALLOCNO_NEXT_COALESCED_ALLOCNO (conflict_allocno)) + { + if (ira_allocno_live_ranges_intersect_p (a, conflict_allocno)) + return true; + if (conflict_allocno == a1) + break; + } + } + else + { + FOR_EACH_ALLOCNO_CONFLICT (a, conflict_allocno, aci) + if (conflict_allocno == a1 + || (allocno_coalesced_p + && bitmap_bit_p (processed_coalesced_allocno_bitmap, + ALLOCNO_NUM (conflict_allocno)))) + return true; + } + if (a == a2) + break; + } + return false; +} + +/* The major function for aggressive allocno coalescing. For the + reload pass (RELOAD_P) we coalesce only spilled allocnos. If some + allocnos have been coalesced, we set up flag + allocno_coalesced_p. */ +static void +coalesce_allocnos (bool reload_p) +{ + ira_allocno_t a; + ira_copy_t cp, next_cp, *sorted_copies; + enum reg_class cover_class; + enum machine_mode mode; + unsigned int j; + int i, n, cp_num, regno; + bitmap_iterator bi; + + sorted_copies = (ira_copy_t *) ira_allocate (ira_copies_num + * sizeof (ira_copy_t)); + cp_num = 0; + /* Collect copies. */ + EXECUTE_IF_SET_IN_BITMAP (coloring_allocno_bitmap, 0, j, bi) + { + a = ira_allocnos[j]; + regno = ALLOCNO_REGNO (a); + if ((! reload_p && ALLOCNO_ASSIGNED_P (a)) + || (reload_p + && (! ALLOCNO_ASSIGNED_P (a) || ALLOCNO_HARD_REGNO (a) >= 0 + || (regno < ira_reg_equiv_len + && (ira_reg_equiv_const[regno] != NULL_RTX + || ira_reg_equiv_invariant_p[regno]))))) + continue; + cover_class = ALLOCNO_COVER_CLASS (a); + mode = ALLOCNO_MODE (a); + for (cp = ALLOCNO_COPIES (a); cp != NULL; cp = next_cp) + { + if (cp->first == a) + { + next_cp = cp->next_first_allocno_copy; + regno = ALLOCNO_REGNO (cp->second); + if ((reload_p + || (ALLOCNO_COVER_CLASS (cp->second) == cover_class + && ALLOCNO_MODE (cp->second) == mode)) + && cp->insn != NULL + && ((! reload_p && ! ALLOCNO_ASSIGNED_P (cp->second)) + || (reload_p + && ALLOCNO_ASSIGNED_P (cp->second) + && ALLOCNO_HARD_REGNO (cp->second) < 0 + && (regno >= ira_reg_equiv_len + || (! ira_reg_equiv_invariant_p[regno] + && ira_reg_equiv_const[regno] == NULL_RTX))))) + sorted_copies[cp_num++] = cp; + } + else if (cp->second == a) + next_cp = cp->next_second_allocno_copy; + else + gcc_unreachable (); + } + } + qsort (sorted_copies, cp_num, sizeof (ira_copy_t), copy_freq_compare_func); + /* Coalesced copies, most frequently executed first. */ + for (; cp_num != 0;) + { + for (i = 0; i < cp_num; i++) + { + cp = sorted_copies[i]; + if (! coalesced_allocno_conflict_p (cp->first, cp->second, reload_p)) + { + allocno_coalesced_p = true; + if (internal_flag_ira_verbose > 3 && ira_dump_file != NULL) + fprintf + (ira_dump_file, + " Coalescing copy %d:a%dr%d-a%dr%d (freq=%d)\n", + cp->num, ALLOCNO_NUM (cp->first), ALLOCNO_REGNO (cp->first), + ALLOCNO_NUM (cp->second), ALLOCNO_REGNO (cp->second), + cp->freq); + merge_allocnos (cp->first, cp->second); + i++; + break; + } + } + /* Collect the rest of copies. */ + for (n = 0; i < cp_num; i++) + { + cp = sorted_copies[i]; + if (ALLOCNO_FIRST_COALESCED_ALLOCNO (cp->first) + != ALLOCNO_FIRST_COALESCED_ALLOCNO (cp->second)) + sorted_copies[n++] = cp; + } + cp_num = n; + } + ira_free (sorted_copies); +} + +/* Chaitin-Briggs coloring for allocnos in COLORING_ALLOCNO_BITMAP + taking into account allocnos in CONSIDERATION_ALLOCNO_BITMAP. */ +static void +color_allocnos (void) +{ + unsigned int i; + bitmap_iterator bi; + ira_allocno_t a; + + allocno_coalesced_p = false; + processed_coalesced_allocno_bitmap = ira_allocate_bitmap (); + if (flag_ira_coalesce) + coalesce_allocnos (false); + /* Put the allocnos into the corresponding buckets. */ + colorable_allocno_bucket = NULL; + uncolorable_allocno_bucket = NULL; + EXECUTE_IF_SET_IN_BITMAP (coloring_allocno_bitmap, 0, i, bi) + { + a = ira_allocnos[i]; + if (ALLOCNO_COVER_CLASS (a) == NO_REGS) + { + ALLOCNO_HARD_REGNO (a) = -1; + ALLOCNO_ASSIGNED_P (a) = true; + ira_assert (ALLOCNO_UPDATED_HARD_REG_COSTS (a) == NULL); + ira_assert (ALLOCNO_UPDATED_CONFLICT_HARD_REG_COSTS (a) == NULL); + if (internal_flag_ira_verbose > 3 && ira_dump_file != NULL) + { + fprintf (ira_dump_file, " Spill"); + print_coalesced_allocno (a); + fprintf (ira_dump_file, "\n"); + } + continue; + } + put_allocno_into_bucket (a); + } + push_allocnos_to_stack (); + pop_allocnos_from_stack (); + if (flag_ira_coalesce) + /* We don't need coalesced allocnos for ira_reassign_pseudos. */ + EXECUTE_IF_SET_IN_BITMAP (coloring_allocno_bitmap, 0, i, bi) + { + a = ira_allocnos[i]; + ALLOCNO_FIRST_COALESCED_ALLOCNO (a) = a; + ALLOCNO_NEXT_COALESCED_ALLOCNO (a) = a; + } + ira_free_bitmap (processed_coalesced_allocno_bitmap); + allocno_coalesced_p = false; +} + + + +/* Output information about the loop given by its LOOP_TREE_NODE. */ +static void +print_loop_title (ira_loop_tree_node_t loop_tree_node) +{ + unsigned int j; + bitmap_iterator bi; + + ira_assert (loop_tree_node->loop != NULL); + fprintf (ira_dump_file, + "\n Loop %d (parent %d, header bb%d, depth %d)\n ref:", + loop_tree_node->loop->num, + (loop_tree_node->parent == NULL + ? -1 : loop_tree_node->parent->loop->num), + loop_tree_node->loop->header->index, + loop_depth (loop_tree_node->loop)); + EXECUTE_IF_SET_IN_BITMAP (loop_tree_node->mentioned_allocnos, 0, j, bi) + fprintf (ira_dump_file, " %dr%d", j, ALLOCNO_REGNO (ira_allocnos[j])); + fprintf (ira_dump_file, "\n modified regnos:"); + EXECUTE_IF_SET_IN_BITMAP (loop_tree_node->modified_regnos, 0, j, bi) + fprintf (ira_dump_file, " %d", j); + fprintf (ira_dump_file, "\n border:"); + EXECUTE_IF_SET_IN_BITMAP (loop_tree_node->border_allocnos, 0, j, bi) + fprintf (ira_dump_file, " %dr%d", j, ALLOCNO_REGNO (ira_allocnos[j])); + fprintf (ira_dump_file, "\n Pressure:"); + for (j = 0; (int) j < ira_reg_class_cover_size; j++) + { + enum reg_class cover_class; + + cover_class = ira_reg_class_cover[j]; + if (loop_tree_node->reg_pressure[cover_class] == 0) + continue; + fprintf (ira_dump_file, " %s=%d", reg_class_names[cover_class], + loop_tree_node->reg_pressure[cover_class]); + } + fprintf (ira_dump_file, "\n"); +} + +/* Color the allocnos inside loop (in the extreme case it can be all + of the function) given the corresponding LOOP_TREE_NODE. The + function is called for each loop during top-down traverse of the + loop tree. */ +static void +color_pass (ira_loop_tree_node_t loop_tree_node) +{ + int regno, hard_regno, index = -1; + int cost, exit_freq, enter_freq; + unsigned int j; + bitmap_iterator bi; + enum machine_mode mode; + enum reg_class rclass, cover_class; + ira_allocno_t a, subloop_allocno; + ira_loop_tree_node_t subloop_node; + + ira_assert (loop_tree_node->bb == NULL); + if (internal_flag_ira_verbose > 1 && ira_dump_file != NULL) + print_loop_title (loop_tree_node); + + bitmap_copy (coloring_allocno_bitmap, loop_tree_node->mentioned_allocnos); + bitmap_ior_into (coloring_allocno_bitmap, loop_tree_node->border_allocnos); + bitmap_copy (consideration_allocno_bitmap, coloring_allocno_bitmap); + EXECUTE_IF_SET_IN_BITMAP (consideration_allocno_bitmap, 0, j, bi) + { + a = ira_allocnos[j]; + if (! ALLOCNO_ASSIGNED_P (a)) + continue; + bitmap_clear_bit (coloring_allocno_bitmap, ALLOCNO_NUM (a)); + } + /* Color all mentioned allocnos including transparent ones. */ + color_allocnos (); + /* Process caps. They are processed just once. */ + if (flag_ira_algorithm == IRA_ALGORITHM_MIXED + || flag_ira_algorithm == IRA_ALGORITHM_REGIONAL) + EXECUTE_IF_SET_IN_BITMAP (loop_tree_node->mentioned_allocnos, 0, j, bi) + { + a = ira_allocnos[j]; + if (ALLOCNO_CAP_MEMBER (a) == NULL) + continue; + /* Remove from processing in the next loop. */ + bitmap_clear_bit (consideration_allocno_bitmap, j); + rclass = ALLOCNO_COVER_CLASS (a); + if ((flag_ira_algorithm == IRA_ALGORITHM_MIXED + && loop_tree_node->reg_pressure[rclass] + <= ira_available_class_regs[rclass])) + { + mode = ALLOCNO_MODE (a); + hard_regno = ALLOCNO_HARD_REGNO (a); + if (hard_regno >= 0) + { + index = ira_class_hard_reg_index[rclass][hard_regno]; + ira_assert (index >= 0); + } + regno = ALLOCNO_REGNO (a); + subloop_allocno = ALLOCNO_CAP_MEMBER (a); + subloop_node = ALLOCNO_LOOP_TREE_NODE (subloop_allocno); + ira_assert (!ALLOCNO_ASSIGNED_P (subloop_allocno)); + ALLOCNO_HARD_REGNO (subloop_allocno) = hard_regno; + ALLOCNO_ASSIGNED_P (subloop_allocno) = true; + if (hard_regno >= 0) + update_copy_costs (subloop_allocno, true); + /* We don't need updated costs anymore: */ + ira_free_allocno_updated_costs (subloop_allocno); + } + } + /* Update costs of the corresponding allocnos (not caps) in the + subloops. */ + for (subloop_node = loop_tree_node->subloops; + subloop_node != NULL; + subloop_node = subloop_node->subloop_next) + { + ira_assert (subloop_node->bb == NULL); + EXECUTE_IF_SET_IN_BITMAP (consideration_allocno_bitmap, 0, j, bi) + { + a = ira_allocnos[j]; + ira_assert (ALLOCNO_CAP_MEMBER (a) == NULL); + mode = ALLOCNO_MODE (a); + rclass = ALLOCNO_COVER_CLASS (a); + hard_regno = ALLOCNO_HARD_REGNO (a); + if (hard_regno >= 0) + { + index = ira_class_hard_reg_index[rclass][hard_regno]; + ira_assert (index >= 0); + } + regno = ALLOCNO_REGNO (a); + /* ??? conflict costs */ + subloop_allocno = subloop_node->regno_allocno_map[regno]; + if (subloop_allocno == NULL + || ALLOCNO_CAP (subloop_allocno) != NULL) + continue; + if ((flag_ira_algorithm == IRA_ALGORITHM_MIXED + && (loop_tree_node->reg_pressure[rclass] + <= ira_available_class_regs[rclass])) + || (hard_regno < 0 + && ! bitmap_bit_p (subloop_node->mentioned_allocnos, + ALLOCNO_NUM (subloop_allocno)))) + { + if (! ALLOCNO_ASSIGNED_P (subloop_allocno)) + { + ALLOCNO_HARD_REGNO (subloop_allocno) = hard_regno; + ALLOCNO_ASSIGNED_P (subloop_allocno) = true; + if (hard_regno >= 0) + update_copy_costs (subloop_allocno, true); + /* We don't need updated costs anymore: */ + ira_free_allocno_updated_costs (subloop_allocno); + } + continue; + } + exit_freq = ira_loop_edge_freq (subloop_node, regno, true); + enter_freq = ira_loop_edge_freq (subloop_node, regno, false); + ira_assert (regno < ira_reg_equiv_len); + if (ira_reg_equiv_invariant_p[regno] + || ira_reg_equiv_const[regno] != NULL_RTX) + { + if (! ALLOCNO_ASSIGNED_P (subloop_allocno)) + { + ALLOCNO_HARD_REGNO (subloop_allocno) = hard_regno; + ALLOCNO_ASSIGNED_P (subloop_allocno) = true; + if (hard_regno >= 0) + update_copy_costs (subloop_allocno, true); + /* We don't need updated costs anymore: */ + ira_free_allocno_updated_costs (subloop_allocno); + } + } + else if (hard_regno < 0) + { + ALLOCNO_UPDATED_MEMORY_COST (subloop_allocno) + -= ((ira_memory_move_cost[mode][rclass][1] * enter_freq) + + (ira_memory_move_cost[mode][rclass][0] * exit_freq)); + } + else + { + cover_class = ALLOCNO_COVER_CLASS (subloop_allocno); + ira_allocate_and_set_costs + (&ALLOCNO_HARD_REG_COSTS (subloop_allocno), cover_class, + ALLOCNO_COVER_CLASS_COST (subloop_allocno)); + ira_allocate_and_set_costs + (&ALLOCNO_CONFLICT_HARD_REG_COSTS (subloop_allocno), + cover_class, 0); + cost = (ira_register_move_cost[mode][rclass][rclass] + * (exit_freq + enter_freq)); + ALLOCNO_HARD_REG_COSTS (subloop_allocno)[index] -= cost; + ALLOCNO_CONFLICT_HARD_REG_COSTS (subloop_allocno)[index] + -= cost; + ALLOCNO_UPDATED_MEMORY_COST (subloop_allocno) + += (ira_memory_move_cost[mode][rclass][0] * enter_freq + + ira_memory_move_cost[mode][rclass][1] * exit_freq); + if (ALLOCNO_COVER_CLASS_COST (subloop_allocno) + > ALLOCNO_HARD_REG_COSTS (subloop_allocno)[index]) + ALLOCNO_COVER_CLASS_COST (subloop_allocno) + = ALLOCNO_HARD_REG_COSTS (subloop_allocno)[index]; + } + } + } +} + +/* Initialize the common data for coloring and calls functions to do + Chaitin-Briggs and regional coloring. */ +static void +do_coloring (void) +{ + coloring_allocno_bitmap = ira_allocate_bitmap (); + allocnos_for_spilling + = (ira_allocno_t *) ira_allocate (sizeof (ira_allocno_t) + * ira_allocnos_num); + splay_tree_node_pool = create_alloc_pool ("splay tree nodes", + sizeof (struct splay_tree_node_s), + 100); + if (internal_flag_ira_verbose > 0 && ira_dump_file != NULL) + fprintf (ira_dump_file, "\n**** Allocnos coloring:\n\n"); + + ira_traverse_loop_tree (false, ira_loop_tree_root, color_pass, NULL); + + if (internal_flag_ira_verbose > 1 && ira_dump_file != NULL) + ira_print_disposition (ira_dump_file); + + free_alloc_pool (splay_tree_node_pool); + ira_free_bitmap (coloring_allocno_bitmap); + ira_free (allocnos_for_spilling); +} + + + +/* Move spill/restore code, which are to be generated in ira-emit.c, + to less frequent points (if it is profitable) by reassigning some + allocnos (in loop with subloops containing in another loop) to + memory which results in longer live-range where the corresponding + pseudo-registers will be in memory. */ +static void +move_spill_restore (void) +{ + int cost, regno, hard_regno, hard_regno2, index; + bool changed_p; + int enter_freq, exit_freq; + enum machine_mode mode; + enum reg_class rclass; + ira_allocno_t a, parent_allocno, subloop_allocno; + ira_loop_tree_node_t parent, loop_node, subloop_node; + ira_allocno_iterator ai; + + for (;;) + { + changed_p = false; + if (internal_flag_ira_verbose > 0 && ira_dump_file != NULL) + fprintf (ira_dump_file, "New iteration of spill/restore move\n"); + FOR_EACH_ALLOCNO (a, ai) + { + regno = ALLOCNO_REGNO (a); + loop_node = ALLOCNO_LOOP_TREE_NODE (a); + if (ALLOCNO_CAP_MEMBER (a) != NULL + || ALLOCNO_CAP (a) != NULL + || (hard_regno = ALLOCNO_HARD_REGNO (a)) < 0 + || loop_node->children == NULL + /* don't do the optimization because it can create + copies and the reload pass can spill the allocno set + by copy although the allocno will not get memory + slot. */ + || ira_reg_equiv_invariant_p[regno] + || ira_reg_equiv_const[regno] != NULL_RTX + || !bitmap_bit_p (loop_node->border_allocnos, ALLOCNO_NUM (a))) + continue; + mode = ALLOCNO_MODE (a); + rclass = ALLOCNO_COVER_CLASS (a); + index = ira_class_hard_reg_index[rclass][hard_regno]; + ira_assert (index >= 0); + cost = (ALLOCNO_MEMORY_COST (a) + - (ALLOCNO_HARD_REG_COSTS (a) == NULL + ? ALLOCNO_COVER_CLASS_COST (a) + : ALLOCNO_HARD_REG_COSTS (a)[index])); + for (subloop_node = loop_node->subloops; + subloop_node != NULL; + subloop_node = subloop_node->subloop_next) + { + ira_assert (subloop_node->bb == NULL); + subloop_allocno = subloop_node->regno_allocno_map[regno]; + if (subloop_allocno == NULL) + continue; + /* We have accumulated cost. To get the real cost of + allocno usage in the loop we should subtract costs of + the subloop allocnos. */ + cost -= (ALLOCNO_MEMORY_COST (subloop_allocno) + - (ALLOCNO_HARD_REG_COSTS (subloop_allocno) == NULL + ? ALLOCNO_COVER_CLASS_COST (subloop_allocno) + : ALLOCNO_HARD_REG_COSTS (subloop_allocno)[index])); + exit_freq = ira_loop_edge_freq (subloop_node, regno, true); + enter_freq = ira_loop_edge_freq (subloop_node, regno, false); + if ((hard_regno2 = ALLOCNO_HARD_REGNO (subloop_allocno)) < 0) + cost -= (ira_memory_move_cost[mode][rclass][0] * exit_freq + + ira_memory_move_cost[mode][rclass][1] * enter_freq); + else + { + cost + += (ira_memory_move_cost[mode][rclass][0] * exit_freq + + ira_memory_move_cost[mode][rclass][1] * enter_freq); + if (hard_regno2 != hard_regno) + cost -= (ira_register_move_cost[mode][rclass][rclass] + * (exit_freq + enter_freq)); + } + } + if ((parent = loop_node->parent) != NULL + && (parent_allocno = parent->regno_allocno_map[regno]) != NULL) + { + exit_freq = ira_loop_edge_freq (loop_node, regno, true); + enter_freq = ira_loop_edge_freq (loop_node, regno, false); + if ((hard_regno2 = ALLOCNO_HARD_REGNO (parent_allocno)) < 0) + cost -= (ira_memory_move_cost[mode][rclass][0] * exit_freq + + ira_memory_move_cost[mode][rclass][1] * enter_freq); + else + { + cost + += (ira_memory_move_cost[mode][rclass][1] * exit_freq + + ira_memory_move_cost[mode][rclass][0] * enter_freq); + if (hard_regno2 != hard_regno) + cost -= (ira_register_move_cost[mode][rclass][rclass] + * (exit_freq + enter_freq)); + } + } + if (cost < 0) + { + ALLOCNO_HARD_REGNO (a) = -1; + if (internal_flag_ira_verbose > 3 && ira_dump_file != NULL) + { + fprintf + (ira_dump_file, + " Moving spill/restore for a%dr%d up from loop %d", + ALLOCNO_NUM (a), regno, loop_node->loop->num); + fprintf (ira_dump_file, " - profit %d\n", -cost); + } + changed_p = true; + } + } + if (! changed_p) + break; + } +} + + + +/* Update current hard reg costs and current conflict hard reg costs + for allocno A. It is done by processing its copies containing + other allocnos already assigned. */ +static void +update_curr_costs (ira_allocno_t a) +{ + int i, hard_regno, cost; + enum machine_mode mode; + enum reg_class cover_class, rclass; + ira_allocno_t another_a; + ira_copy_t cp, next_cp; + + ira_assert (! ALLOCNO_ASSIGNED_P (a)); + cover_class = ALLOCNO_COVER_CLASS (a); + if (cover_class == NO_REGS) + return; + mode = ALLOCNO_MODE (a); + for (cp = ALLOCNO_COPIES (a); cp != NULL; cp = next_cp) + { + if (cp->first == a) + { + next_cp = cp->next_first_allocno_copy; + another_a = cp->second; + } + else if (cp->second == a) + { + next_cp = cp->next_second_allocno_copy; + another_a = cp->first; + } + else + gcc_unreachable (); + if (cover_class != ALLOCNO_COVER_CLASS (another_a) + || ! ALLOCNO_ASSIGNED_P (another_a) + || (hard_regno = ALLOCNO_HARD_REGNO (another_a)) < 0) + continue; + rclass = REGNO_REG_CLASS (hard_regno); + i = ira_class_hard_reg_index[cover_class][hard_regno]; + ira_assert (i >= 0); + cost = (cp->first == a + ? ira_register_move_cost[mode][rclass][cover_class] + : ira_register_move_cost[mode][cover_class][rclass]); + ira_allocate_and_set_or_copy_costs + (&ALLOCNO_UPDATED_HARD_REG_COSTS (a), + cover_class, ALLOCNO_COVER_CLASS_COST (a), + ALLOCNO_HARD_REG_COSTS (a)); + ira_allocate_and_set_or_copy_costs + (&ALLOCNO_UPDATED_CONFLICT_HARD_REG_COSTS (a), + cover_class, 0, ALLOCNO_CONFLICT_HARD_REG_COSTS (a)); + ALLOCNO_UPDATED_HARD_REG_COSTS (a)[i] -= cp->freq * cost; + ALLOCNO_UPDATED_CONFLICT_HARD_REG_COSTS (a)[i] -= cp->freq * cost; + } +} + +/* Map: allocno number -> allocno priority. */ +static int *allocno_priorities; + +/* Allocate array ALLOCNO_PRIORITIES and set up priorities for N allocnos in + array CONSIDERATION_ALLOCNOS. */ +static void +start_allocno_priorities (ira_allocno_t *consideration_allocnos, int n) +{ + int i, length; + ira_allocno_t a; + allocno_live_range_t r; + + for (i = 0; i < n; i++) + { + a = consideration_allocnos[i]; + for (length = 0, r = ALLOCNO_LIVE_RANGES (a); r != NULL; r = r->next) + length += r->finish - r->start + 1; + if (length == 0) + { + allocno_priorities[ALLOCNO_NUM (a)] = 0; + continue; + } + ira_assert (length > 0 && ALLOCNO_NREFS (a) >= 0); + allocno_priorities[ALLOCNO_NUM (a)] + = (((double) (floor_log2 (ALLOCNO_NREFS (a)) * ALLOCNO_FREQ (a)) + / length) + * (10000 / REG_FREQ_MAX) * PSEUDO_REGNO_SIZE (ALLOCNO_REGNO (a))); + } +} + +/* Sort allocnos according to their priorities which are calculated + analogous to ones in file `global.c'. */ +static int +allocno_priority_compare_func (const void *v1p, const void *v2p) +{ + ira_allocno_t a1 = *(const ira_allocno_t *) v1p; + ira_allocno_t a2 = *(const ira_allocno_t *) v2p; + int pri1, pri2; + + pri1 = allocno_priorities[ALLOCNO_NUM (a1)]; + pri2 = allocno_priorities[ALLOCNO_NUM (a2)]; + if (pri2 - pri1) + return pri2 - pri1; + + /* If regs are equally good, sort by allocnos, so that the results of + qsort leave nothing to chance. */ + return ALLOCNO_NUM (a1) - ALLOCNO_NUM (a2); +} + +/* Try to assign hard registers to the unassigned allocnos and + allocnos conflicting with them or conflicting with allocnos whose + regno >= START_REGNO. The function is called after ira_flattening, + so more allocnos (including ones created in ira-emit.c) will have a + chance to get a hard register. We use simple assignment algorithm + based on priorities. */ +void +ira_reassign_conflict_allocnos (int start_regno) +{ + int i, allocnos_to_color_num; + ira_allocno_t a, conflict_a; + ira_allocno_conflict_iterator aci; + enum reg_class cover_class; + bitmap allocnos_to_color; + ira_allocno_iterator ai; + + allocnos_to_color = ira_allocate_bitmap (); + allocnos_to_color_num = 0; + FOR_EACH_ALLOCNO (a, ai) + { + if (! ALLOCNO_ASSIGNED_P (a) + && ! bitmap_bit_p (allocnos_to_color, ALLOCNO_NUM (a))) + { + if (ALLOCNO_COVER_CLASS (a) != NO_REGS) + sorted_allocnos[allocnos_to_color_num++] = a; + else + { + ALLOCNO_ASSIGNED_P (a) = true; + ALLOCNO_HARD_REGNO (a) = -1; + ira_assert (ALLOCNO_UPDATED_HARD_REG_COSTS (a) == NULL); + ira_assert (ALLOCNO_UPDATED_CONFLICT_HARD_REG_COSTS (a) == NULL); + } + bitmap_set_bit (allocnos_to_color, ALLOCNO_NUM (a)); + } + if (ALLOCNO_REGNO (a) < start_regno + || (cover_class = ALLOCNO_COVER_CLASS (a)) == NO_REGS) + continue; + FOR_EACH_ALLOCNO_CONFLICT (a, conflict_a, aci) + { + ira_assert (cover_class == ALLOCNO_COVER_CLASS (conflict_a)); + if (bitmap_bit_p (allocnos_to_color, ALLOCNO_NUM (conflict_a))) + continue; + bitmap_set_bit (allocnos_to_color, ALLOCNO_NUM (conflict_a)); + sorted_allocnos[allocnos_to_color_num++] = conflict_a; + } + } + ira_free_bitmap (allocnos_to_color); + if (allocnos_to_color_num > 1) + { + start_allocno_priorities (sorted_allocnos, allocnos_to_color_num); + qsort (sorted_allocnos, allocnos_to_color_num, sizeof (ira_allocno_t), + allocno_priority_compare_func); + } + for (i = 0; i < allocnos_to_color_num; i++) + { + a = sorted_allocnos[i]; + ALLOCNO_ASSIGNED_P (a) = false; + ira_assert (ALLOCNO_UPDATED_HARD_REG_COSTS (a) == NULL); + ira_assert (ALLOCNO_UPDATED_CONFLICT_HARD_REG_COSTS (a) == NULL); + update_curr_costs (a); + } + for (i = 0; i < allocnos_to_color_num; i++) + { + a = sorted_allocnos[i]; + if (assign_hard_reg (a, true)) + { + if (internal_flag_ira_verbose > 3 && ira_dump_file != NULL) + fprintf + (ira_dump_file, + " Secondary allocation: assign hard reg %d to reg %d\n", + ALLOCNO_HARD_REGNO (a), ALLOCNO_REGNO (a)); + } + } +} + + + +/* This page contains code to coalesce memory stack slots used by + spilled allocnos. This results in smaller stack frame, better data + locality, and in smaller code for some architectures like + x86/x86_64 where insn size depends on address displacement value. + On the other hand, it can worsen insn scheduling after the RA but + in practice it is less important than smaller stack frames. */ + +/* Usage cost and order number of coalesced allocno set to which + given pseudo register belongs to. */ +static int *regno_coalesced_allocno_cost; +static int *regno_coalesced_allocno_num; + +/* Sort pseudos according frequencies of coalesced allocno sets they + belong to (putting most frequently ones first), and according to + coalesced allocno set order numbers. */ +static int +coalesced_pseudo_reg_freq_compare (const void *v1p, const void *v2p) +{ + const int regno1 = *(const int *) v1p; + const int regno2 = *(const int *) v2p; + int diff; + + if ((diff = (regno_coalesced_allocno_cost[regno2] + - regno_coalesced_allocno_cost[regno1])) != 0) + return diff; + if ((diff = (regno_coalesced_allocno_num[regno1] + - regno_coalesced_allocno_num[regno2])) != 0) + return diff; + return regno1 - regno2; +} + +/* Widest width in which each pseudo reg is referred to (via subreg). + It is used for sorting pseudo registers. */ +static unsigned int *regno_max_ref_width; + +/* Redefine STACK_GROWS_DOWNWARD in terms of 0 or 1. */ +#ifdef STACK_GROWS_DOWNWARD +# undef STACK_GROWS_DOWNWARD +# define STACK_GROWS_DOWNWARD 1 +#else +# define STACK_GROWS_DOWNWARD 0 +#endif + +/* Sort pseudos according their slot numbers (putting ones with + smaller numbers first, or last when the frame pointer is not + needed). */ +static int +coalesced_pseudo_reg_slot_compare (const void *v1p, const void *v2p) +{ + const int regno1 = *(const int *) v1p; + const int regno2 = *(const int *) v2p; + ira_allocno_t a1 = ira_regno_allocno_map[regno1]; + ira_allocno_t a2 = ira_regno_allocno_map[regno2]; + int diff, slot_num1, slot_num2; + int total_size1, total_size2; + + if (a1 == NULL || ALLOCNO_HARD_REGNO (a1) >= 0) + { + if (a2 == NULL || ALLOCNO_HARD_REGNO (a2) >= 0) + return (const int *) v1p - (const int *) v2p; /* Save the order. */ + return 1; + } + else if (a2 == NULL || ALLOCNO_HARD_REGNO (a2) >= 0) + return -1; + slot_num1 = -ALLOCNO_HARD_REGNO (a1); + slot_num2 = -ALLOCNO_HARD_REGNO (a2); + if ((diff = slot_num1 - slot_num2) != 0) + return (frame_pointer_needed + || !FRAME_GROWS_DOWNWARD == STACK_GROWS_DOWNWARD ? diff : -diff); + total_size1 = MAX (PSEUDO_REGNO_BYTES (regno1), regno_max_ref_width[regno1]); + total_size2 = MAX (PSEUDO_REGNO_BYTES (regno2), regno_max_ref_width[regno2]); + if ((diff = total_size2 - total_size1) != 0) + return diff; + return (const int *) v1p - (const int *) v2p; /* Save the order. */ +} + +/* Setup REGNO_COALESCED_ALLOCNO_COST and REGNO_COALESCED_ALLOCNO_NUM + for coalesced allocno sets containing allocnos with their regnos + given in array PSEUDO_REGNOS of length N. */ +static void +setup_coalesced_allocno_costs_and_nums (int *pseudo_regnos, int n) +{ + int i, num, regno, cost; + ira_allocno_t allocno, a; + + for (num = i = 0; i < n; i++) + { + regno = pseudo_regnos[i]; + allocno = ira_regno_allocno_map[regno]; + if (allocno == NULL) + { + regno_coalesced_allocno_cost[regno] = 0; + regno_coalesced_allocno_num[regno] = ++num; + continue; + } + if (ALLOCNO_FIRST_COALESCED_ALLOCNO (allocno) != allocno) + continue; + num++; + for (cost = 0, a = ALLOCNO_NEXT_COALESCED_ALLOCNO (allocno);; + a = ALLOCNO_NEXT_COALESCED_ALLOCNO (a)) + { + cost += ALLOCNO_FREQ (a); + if (a == allocno) + break; + } + for (a = ALLOCNO_NEXT_COALESCED_ALLOCNO (allocno);; + a = ALLOCNO_NEXT_COALESCED_ALLOCNO (a)) + { + regno_coalesced_allocno_num[ALLOCNO_REGNO (a)] = num; + regno_coalesced_allocno_cost[ALLOCNO_REGNO (a)] = cost; + if (a == allocno) + break; + } + } +} + +/* Collect spilled allocnos representing coalesced allocno sets (the + first coalesced allocno). The collected allocnos are returned + through array SPILLED_COALESCED_ALLOCNOS. The function returns the + number of the collected allocnos. The allocnos are given by their + regnos in array PSEUDO_REGNOS of length N. */ +static int +collect_spilled_coalesced_allocnos (int *pseudo_regnos, int n, + ira_allocno_t *spilled_coalesced_allocnos) +{ + int i, num, regno; + ira_allocno_t allocno; + + for (num = i = 0; i < n; i++) + { + regno = pseudo_regnos[i]; + allocno = ira_regno_allocno_map[regno]; + if (allocno == NULL || ALLOCNO_HARD_REGNO (allocno) >= 0 + || ALLOCNO_FIRST_COALESCED_ALLOCNO (allocno) != allocno) + continue; + spilled_coalesced_allocnos[num++] = allocno; + } + return num; +} + +/* We have coalesced allocnos involving in copies. Coalesce allocnos + further in order to share the same memory stack slot. Allocnos + representing sets of allocnos coalesced before the call are given + in array SPILLED_COALESCED_ALLOCNOS of length NUM. Return TRUE if + some allocnos were coalesced in the function. */ +static bool +coalesce_spill_slots (ira_allocno_t *spilled_coalesced_allocnos, int num) +{ + int i, j; + ira_allocno_t allocno, a; + bool merged_p = false; + + /* Coalesce non-conflicting spilled allocnos preferring most + frequently used. */ + for (i = 0; i < num; i++) + { + allocno = spilled_coalesced_allocnos[i]; + if (ALLOCNO_FIRST_COALESCED_ALLOCNO (allocno) != allocno + || (ALLOCNO_REGNO (allocno) < ira_reg_equiv_len + && (ira_reg_equiv_invariant_p[ALLOCNO_REGNO (allocno)] + || ira_reg_equiv_const[ALLOCNO_REGNO (allocno)] != NULL_RTX))) + continue; + for (j = 0; j < i; j++) + { + a = spilled_coalesced_allocnos[j]; + if (ALLOCNO_FIRST_COALESCED_ALLOCNO (a) != a + || (ALLOCNO_REGNO (a) < ira_reg_equiv_len + && (ira_reg_equiv_invariant_p[ALLOCNO_REGNO (a)] + || ira_reg_equiv_const[ALLOCNO_REGNO (a)] != NULL_RTX)) + || coalesced_allocno_conflict_p (allocno, a, true)) + continue; + allocno_coalesced_p = true; + merged_p = true; + if (internal_flag_ira_verbose > 3 && ira_dump_file != NULL) + fprintf (ira_dump_file, + " Coalescing spilled allocnos a%dr%d->a%dr%d\n", + ALLOCNO_NUM (allocno), ALLOCNO_REGNO (allocno), + ALLOCNO_NUM (a), ALLOCNO_REGNO (a)); + merge_allocnos (a, allocno); + ira_assert (ALLOCNO_FIRST_COALESCED_ALLOCNO (a) == a); + } + } + return merged_p; +} + +/* Sort pseudo-register numbers in array PSEUDO_REGNOS of length N for + subsequent assigning stack slots to them in the reload pass. To do + this we coalesce spilled allocnos first to decrease the number of + memory-memory move insns. This function is called by the + reload. */ +void +ira_sort_regnos_for_alter_reg (int *pseudo_regnos, int n, + unsigned int *reg_max_ref_width) +{ + int max_regno = max_reg_num (); + int i, regno, num, slot_num; + ira_allocno_t allocno, a; + ira_allocno_iterator ai; + ira_allocno_t *spilled_coalesced_allocnos; + + processed_coalesced_allocno_bitmap = ira_allocate_bitmap (); + /* Set up allocnos can be coalesced. */ + coloring_allocno_bitmap = ira_allocate_bitmap (); + for (i = 0; i < n; i++) + { + regno = pseudo_regnos[i]; + allocno = ira_regno_allocno_map[regno]; + if (allocno != NULL) + bitmap_set_bit (coloring_allocno_bitmap, + ALLOCNO_NUM (allocno)); + } + allocno_coalesced_p = false; + coalesce_allocnos (true); + ira_free_bitmap (coloring_allocno_bitmap); + regno_coalesced_allocno_cost + = (int *) ira_allocate (max_regno * sizeof (int)); + regno_coalesced_allocno_num + = (int *) ira_allocate (max_regno * sizeof (int)); + memset (regno_coalesced_allocno_num, 0, max_regno * sizeof (int)); + setup_coalesced_allocno_costs_and_nums (pseudo_regnos, n); + /* Sort regnos according frequencies of the corresponding coalesced + allocno sets. */ + qsort (pseudo_regnos, n, sizeof (int), coalesced_pseudo_reg_freq_compare); + spilled_coalesced_allocnos + = (ira_allocno_t *) ira_allocate (ira_allocnos_num + * sizeof (ira_allocno_t)); + /* Collect allocnos representing the spilled coalesced allocno + sets. */ + num = collect_spilled_coalesced_allocnos (pseudo_regnos, n, + spilled_coalesced_allocnos); + if (flag_ira_share_spill_slots + && coalesce_spill_slots (spilled_coalesced_allocnos, num)) + { + setup_coalesced_allocno_costs_and_nums (pseudo_regnos, n); + qsort (pseudo_regnos, n, sizeof (int), + coalesced_pseudo_reg_freq_compare); + num = collect_spilled_coalesced_allocnos (pseudo_regnos, n, + spilled_coalesced_allocnos); + } + ira_free_bitmap (processed_coalesced_allocno_bitmap); + allocno_coalesced_p = false; + /* Assign stack slot numbers to spilled allocno sets, use smaller + numbers for most frequently used coalesced allocnos. -1 is + reserved for dynamic search of stack slots for pseudos spilled by + the reload. */ + slot_num = 1; + for (i = 0; i < num; i++) + { + allocno = spilled_coalesced_allocnos[i]; + if (ALLOCNO_FIRST_COALESCED_ALLOCNO (allocno) != allocno + || ALLOCNO_HARD_REGNO (allocno) >= 0 + || (ALLOCNO_REGNO (allocno) < ira_reg_equiv_len + && (ira_reg_equiv_invariant_p[ALLOCNO_REGNO (allocno)] + || ira_reg_equiv_const[ALLOCNO_REGNO (allocno)] != NULL_RTX))) + continue; + if (internal_flag_ira_verbose > 3 && ira_dump_file != NULL) + fprintf (ira_dump_file, " Slot %d (freq,size):", slot_num); + slot_num++; + for (a = ALLOCNO_NEXT_COALESCED_ALLOCNO (allocno);; + a = ALLOCNO_NEXT_COALESCED_ALLOCNO (a)) + { + ira_assert (ALLOCNO_HARD_REGNO (a) < 0); + ALLOCNO_HARD_REGNO (a) = -slot_num; + if (internal_flag_ira_verbose > 3 && ira_dump_file != NULL) + fprintf (ira_dump_file, " a%dr%d(%d,%d)", + ALLOCNO_NUM (a), ALLOCNO_REGNO (a), ALLOCNO_FREQ (a), + MAX (PSEUDO_REGNO_BYTES (ALLOCNO_REGNO (a)), + reg_max_ref_width[ALLOCNO_REGNO (a)])); + + if (a == allocno) + break; + } + if (internal_flag_ira_verbose > 3 && ira_dump_file != NULL) + fprintf (ira_dump_file, "\n"); + } + ira_spilled_reg_stack_slots_num = slot_num - 1; + ira_free (spilled_coalesced_allocnos); + /* Sort regnos according the slot numbers. */ + regno_max_ref_width = reg_max_ref_width; + qsort (pseudo_regnos, n, sizeof (int), coalesced_pseudo_reg_slot_compare); + /* Uncoalesce allocnos which is necessary for (re)assigning during + the reload pass. */ + FOR_EACH_ALLOCNO (a, ai) + { + ALLOCNO_FIRST_COALESCED_ALLOCNO (a) = a; + ALLOCNO_NEXT_COALESCED_ALLOCNO (a) = a; + } + ira_free (regno_coalesced_allocno_num); + ira_free (regno_coalesced_allocno_cost); +} + + + +/* This page contains code used by the reload pass to improve the + final code. */ + +/* The function is called from reload to mark changes in the + allocation of REGNO made by the reload. Remember that reg_renumber + reflects the change result. */ +void +ira_mark_allocation_change (int regno) +{ + ira_allocno_t a = ira_regno_allocno_map[regno]; + int old_hard_regno, hard_regno, cost; + enum reg_class cover_class = ALLOCNO_COVER_CLASS (a); + + ira_assert (a != NULL); + hard_regno = reg_renumber[regno]; + if ((old_hard_regno = ALLOCNO_HARD_REGNO (a)) == hard_regno) + return; + if (old_hard_regno < 0) + cost = -ALLOCNO_MEMORY_COST (a); + else + { + ira_assert (ira_class_hard_reg_index[cover_class][old_hard_regno] >= 0); + cost = -(ALLOCNO_HARD_REG_COSTS (a) == NULL + ? ALLOCNO_COVER_CLASS_COST (a) + : ALLOCNO_HARD_REG_COSTS (a) + [ira_class_hard_reg_index[cover_class][old_hard_regno]]); + update_copy_costs (a, false); + } + ira_overall_cost -= cost; + ALLOCNO_HARD_REGNO (a) = hard_regno; + if (hard_regno < 0) + { + ALLOCNO_HARD_REGNO (a) = -1; + cost += ALLOCNO_MEMORY_COST (a); + } + else if (ira_class_hard_reg_index[cover_class][hard_regno] >= 0) + { + cost += (ALLOCNO_HARD_REG_COSTS (a) == NULL + ? ALLOCNO_COVER_CLASS_COST (a) + : ALLOCNO_HARD_REG_COSTS (a) + [ira_class_hard_reg_index[cover_class][hard_regno]]); + update_copy_costs (a, true); + } + else + /* Reload changed class of the allocno. */ + cost = 0; + ira_overall_cost += cost; +} + +/* This function is called when reload deletes memory-memory move. In + this case we marks that the allocation of the corresponding + allocnos should be not changed in future. Otherwise we risk to get + a wrong code. */ +void +ira_mark_memory_move_deletion (int dst_regno, int src_regno) +{ + ira_allocno_t dst = ira_regno_allocno_map[dst_regno]; + ira_allocno_t src = ira_regno_allocno_map[src_regno]; + + ira_assert (dst != NULL && src != NULL + && ALLOCNO_HARD_REGNO (dst) < 0 + && ALLOCNO_HARD_REGNO (src) < 0); + ALLOCNO_DONT_REASSIGN_P (dst) = true; + ALLOCNO_DONT_REASSIGN_P (src) = true; +} + +/* Try to assign a hard register (except for FORBIDDEN_REGS) to + allocno A and return TRUE in the case of success. That is an + analog of retry_global_alloc for IRA. */ +static bool +allocno_reload_assign (ira_allocno_t a, HARD_REG_SET forbidden_regs) +{ + int hard_regno; + enum reg_class cover_class; + int regno = ALLOCNO_REGNO (a); + + IOR_HARD_REG_SET (ALLOCNO_TOTAL_CONFLICT_HARD_REGS (a), forbidden_regs); + if (! flag_caller_saves && ALLOCNO_CALLS_CROSSED_NUM (a) != 0) + IOR_HARD_REG_SET (ALLOCNO_TOTAL_CONFLICT_HARD_REGS (a), call_used_reg_set); + ALLOCNO_ASSIGNED_P (a) = false; + ira_assert (ALLOCNO_UPDATED_HARD_REG_COSTS (a) == NULL); + ira_assert (ALLOCNO_UPDATED_CONFLICT_HARD_REG_COSTS (a) == NULL); + cover_class = ALLOCNO_COVER_CLASS (a); + update_curr_costs (a); + assign_hard_reg (a, true); + hard_regno = ALLOCNO_HARD_REGNO (a); + reg_renumber[regno] = hard_regno; + if (hard_regno < 0) + ALLOCNO_HARD_REGNO (a) = -1; + else + { + ira_assert (ira_class_hard_reg_index[cover_class][hard_regno] >= 0); + ira_overall_cost -= (ALLOCNO_MEMORY_COST (a) + - (ALLOCNO_HARD_REG_COSTS (a) == NULL + ? ALLOCNO_COVER_CLASS_COST (a) + : ALLOCNO_HARD_REG_COSTS (a) + [ira_class_hard_reg_index + [cover_class][hard_regno]])); + if (ALLOCNO_CALLS_CROSSED_NUM (a) != 0 + && ! ira_hard_reg_not_in_set_p (hard_regno, ALLOCNO_MODE (a), + call_used_reg_set)) + { + ira_assert (flag_caller_saves); + caller_save_needed = 1; + } + } + + /* If we found a hard register, modify the RTL for the pseudo + register to show the hard register, and mark the pseudo register + live. */ + if (reg_renumber[regno] >= 0) + { + if (internal_flag_ira_verbose > 3 && ira_dump_file != NULL) + fprintf (ira_dump_file, ": reassign to %d\n", reg_renumber[regno]); + SET_REGNO (regno_reg_rtx[regno], reg_renumber[regno]); + mark_home_live (regno); + } + else if (internal_flag_ira_verbose > 3 && ira_dump_file != NULL) + fprintf (ira_dump_file, "\n"); + + return reg_renumber[regno] >= 0; +} + +/* Sort pseudos according their usage frequencies (putting most + frequently ones first). */ +static int +pseudo_reg_compare (const void *v1p, const void *v2p) +{ + int regno1 = *(const int *) v1p; + int regno2 = *(const int *) v2p; + int diff; + + if ((diff = REG_FREQ (regno2) - REG_FREQ (regno1)) != 0) + return diff; + return regno1 - regno2; +} + +/* Try to allocate hard registers to SPILLED_PSEUDO_REGS (there are + NUM of them) or spilled pseudos conflicting with pseudos in + SPILLED_PSEUDO_REGS. Return TRUE and update SPILLED, if the + allocation has been changed. The function doesn't use + BAD_SPILL_REGS and hard registers in PSEUDO_FORBIDDEN_REGS and + PSEUDO_PREVIOUS_REGS for the corresponding pseudos. The function + is called by the reload pass at the end of each reload + iteration. */ +bool +ira_reassign_pseudos (int *spilled_pseudo_regs, int num, + HARD_REG_SET bad_spill_regs, + HARD_REG_SET *pseudo_forbidden_regs, + HARD_REG_SET *pseudo_previous_regs, bitmap spilled) +{ + int i, m, n, regno; + bool changed_p; + ira_allocno_t a, conflict_a; + HARD_REG_SET forbidden_regs; + ira_allocno_conflict_iterator aci; + + if (num > 1) + qsort (spilled_pseudo_regs, num, sizeof (int), pseudo_reg_compare); + changed_p = false; + /* Try to assign hard registers to pseudos from + SPILLED_PSEUDO_REGS. */ + for (m = i = 0; i < num; i++) + { + regno = spilled_pseudo_regs[i]; + COPY_HARD_REG_SET (forbidden_regs, bad_spill_regs); + IOR_HARD_REG_SET (forbidden_regs, pseudo_forbidden_regs[regno]); + IOR_HARD_REG_SET (forbidden_regs, pseudo_previous_regs[regno]); + gcc_assert (reg_renumber[regno] < 0); + a = ira_regno_allocno_map[regno]; + ira_mark_allocation_change (regno); + ira_assert (reg_renumber[regno] < 0); + if (internal_flag_ira_verbose > 3 && ira_dump_file != NULL) + fprintf (ira_dump_file, + " Spill %d(a%d), cost=%d", regno, ALLOCNO_NUM (a), + ALLOCNO_MEMORY_COST (a) + - ALLOCNO_COVER_CLASS_COST (a)); + allocno_reload_assign (a, forbidden_regs); + if (reg_renumber[regno] >= 0) + { + CLEAR_REGNO_REG_SET (spilled, regno); + changed_p = true; + } + else + spilled_pseudo_regs[m++] = regno; + } + if (m == 0) + return changed_p; + if (internal_flag_ira_verbose > 3 && ira_dump_file != NULL) + { + fprintf (ira_dump_file, " Spilled regs"); + for (i = 0; i < m; i++) + fprintf (ira_dump_file, " %d", spilled_pseudo_regs[i]); + fprintf (ira_dump_file, "\n"); + } + /* Try to assign hard registers to pseudos conflicting with ones + from SPILLED_PSEUDO_REGS. */ + for (i = n = 0; i < m; i++) + { + regno = spilled_pseudo_regs[i]; + a = ira_regno_allocno_map[regno]; + FOR_EACH_ALLOCNO_CONFLICT (a, conflict_a, aci) + if (ALLOCNO_HARD_REGNO (conflict_a) < 0 + && ! ALLOCNO_DONT_REASSIGN_P (conflict_a) + && ! bitmap_bit_p (consideration_allocno_bitmap, + ALLOCNO_NUM (conflict_a))) + { + sorted_allocnos[n++] = conflict_a; + bitmap_set_bit (consideration_allocno_bitmap, + ALLOCNO_NUM (conflict_a)); + } + } + if (n != 0) + { + start_allocno_priorities (sorted_allocnos, n); + qsort (sorted_allocnos, n, sizeof (ira_allocno_t), + allocno_priority_compare_func); + for (i = 0; i < n; i++) + { + a = sorted_allocnos[i]; + regno = ALLOCNO_REGNO (a); + COPY_HARD_REG_SET (forbidden_regs, bad_spill_regs); + IOR_HARD_REG_SET (forbidden_regs, pseudo_forbidden_regs[regno]); + IOR_HARD_REG_SET (forbidden_regs, pseudo_previous_regs[regno]); + if (internal_flag_ira_verbose > 3 && ira_dump_file != NULL) + fprintf (ira_dump_file, + " Try assign %d(a%d), cost=%d", + regno, ALLOCNO_NUM (a), + ALLOCNO_MEMORY_COST (a) + - ALLOCNO_COVER_CLASS_COST (a)); + if (allocno_reload_assign (a, forbidden_regs)) + { + changed_p = true; + bitmap_clear_bit (spilled, regno); + } + } + } + return changed_p; +} + +/* The function is called by reload and returns already allocated + stack slot (if any) for REGNO with given INHERENT_SIZE and + TOTAL_SIZE. In the case of failure to find a slot which can be + used for REGNO, the function returns NULL. */ +rtx +ira_reuse_stack_slot (int regno, unsigned int inherent_size, + unsigned int total_size) +{ + unsigned int i; + int slot_num, best_slot_num; + int cost, best_cost; + ira_copy_t cp, next_cp; + ira_allocno_t another_allocno, allocno = ira_regno_allocno_map[regno]; + rtx x; + bitmap_iterator bi; + struct ira_spilled_reg_stack_slot *slot = NULL; + + ira_assert (flag_ira && inherent_size == PSEUDO_REGNO_BYTES (regno) + && inherent_size <= total_size + && ALLOCNO_HARD_REGNO (allocno) < 0); + if (! flag_ira_share_spill_slots) + return NULL_RTX; + slot_num = -ALLOCNO_HARD_REGNO (allocno) - 2; + if (slot_num != -1) + { + slot = &ira_spilled_reg_stack_slots[slot_num]; + x = slot->mem; + } + else + { + best_cost = best_slot_num = -1; + x = NULL_RTX; + /* It means that the pseudo was spilled in the reload pass, try + to reuse a slot. */ + for (slot_num = 0; + slot_num < ira_spilled_reg_stack_slots_num; + slot_num++) + { + slot = &ira_spilled_reg_stack_slots[slot_num]; + if (slot->mem == NULL_RTX) + continue; + if (slot->width < total_size + || GET_MODE_SIZE (GET_MODE (slot->mem)) < inherent_size) + continue; + + EXECUTE_IF_SET_IN_BITMAP (&slot->spilled_regs, + FIRST_PSEUDO_REGISTER, i, bi) + { + another_allocno = ira_regno_allocno_map[i]; + if (ira_allocno_live_ranges_intersect_p (allocno, + another_allocno)) + goto cont; + } + for (cost = 0, cp = ALLOCNO_COPIES (allocno); + cp != NULL; + cp = next_cp) + { + if (cp->first == allocno) + { + next_cp = cp->next_first_allocno_copy; + another_allocno = cp->second; + } + else if (cp->second == allocno) + { + next_cp = cp->next_second_allocno_copy; + another_allocno = cp->first; + } + else + gcc_unreachable (); + if (cp->insn == NULL_RTX) + continue; + if (bitmap_bit_p (&slot->spilled_regs, + ALLOCNO_REGNO (another_allocno))) + cost += cp->freq; + } + if (cost > best_cost) + { + best_cost = cost; + best_slot_num = slot_num; + } + cont: + ; + } + if (best_cost >= 0) + { + slot = &ira_spilled_reg_stack_slots[best_slot_num]; + SET_REGNO_REG_SET (&slot->spilled_regs, regno); + x = slot->mem; + ALLOCNO_HARD_REGNO (allocno) = -best_slot_num - 2; + } + } + if (x != NULL_RTX) + { + ira_assert (slot->width >= total_size); + EXECUTE_IF_SET_IN_BITMAP (&slot->spilled_regs, + FIRST_PSEUDO_REGISTER, i, bi) + { + ira_assert (! ira_pseudo_live_ranges_intersect_p (regno, i)); + } + SET_REGNO_REG_SET (&slot->spilled_regs, regno); + if (internal_flag_ira_verbose > 3 && ira_dump_file) + { + fprintf (ira_dump_file, " Assigning %d(freq=%d) slot %d of", + regno, REG_FREQ (regno), slot_num); + EXECUTE_IF_SET_IN_BITMAP (&slot->spilled_regs, + FIRST_PSEUDO_REGISTER, i, bi) + { + if ((unsigned) regno != i) + fprintf (ira_dump_file, " %d", i); + } + fprintf (ira_dump_file, "\n"); + } + } + return x; +} + +/* This is called by reload every time a new stack slot X with + TOTAL_SIZE was allocated for REGNO. We store this info for + subsequent ira_reuse_stack_slot calls. */ +void +ira_mark_new_stack_slot (rtx x, int regno, unsigned int total_size) +{ + struct ira_spilled_reg_stack_slot *slot; + int slot_num; + ira_allocno_t allocno; + + ira_assert (flag_ira && PSEUDO_REGNO_BYTES (regno) <= total_size); + allocno = ira_regno_allocno_map[regno]; + slot_num = -ALLOCNO_HARD_REGNO (allocno) - 2; + if (slot_num == -1) + { + slot_num = ira_spilled_reg_stack_slots_num++; + ALLOCNO_HARD_REGNO (allocno) = -slot_num - 2; + } + slot = &ira_spilled_reg_stack_slots[slot_num]; + INIT_REG_SET (&slot->spilled_regs); + SET_REGNO_REG_SET (&slot->spilled_regs, regno); + slot->mem = x; + slot->width = total_size; + if (internal_flag_ira_verbose > 3 && ira_dump_file) + fprintf (ira_dump_file, " Assigning %d(freq=%d) a new slot %d\n", + regno, REG_FREQ (regno), slot_num); +} + + +/* Return spill cost for pseudo-registers whose numbers are in array + REGNOS (with a negative number as an end marker) for reload with + given IN and OUT for INSN. Return also number points (through + EXCESS_PRESSURE_LIVE_LENGTH) where the pseudo-register lives and + the register pressure is high, number of references of the + pseudo-registers (through NREFS), number of callee-clobbered + hard-registers occupied by the pseudo-registers (through + CALL_USED_COUNT), and the first hard regno occupied by the + pseudo-registers (through FIRST_HARD_REGNO). */ +static int +calculate_spill_cost (int *regnos, rtx in, rtx out, rtx insn, + int *excess_pressure_live_length, + int *nrefs, int *call_used_count, int *first_hard_regno) +{ + int i, cost, regno, hard_regno, j, count, saved_cost, nregs; + bool in_p, out_p; + int length; + ira_allocno_t a; + + *nrefs = 0; + for (length = count = cost = i = 0;; i++) + { + regno = regnos[i]; + if (regno < 0) + break; + *nrefs += REG_N_REFS (regno); + hard_regno = reg_renumber[regno]; + ira_assert (hard_regno >= 0); + a = ira_regno_allocno_map[regno]; + length += ALLOCNO_EXCESS_PRESSURE_POINTS_NUM (a); + cost += ALLOCNO_MEMORY_COST (a) - ALLOCNO_COVER_CLASS_COST (a); + nregs = hard_regno_nregs[hard_regno][ALLOCNO_MODE (a)]; + for (j = 0; j < nregs; j++) + if (! TEST_HARD_REG_BIT (call_used_reg_set, hard_regno + j)) + break; + if (j == nregs) + count++; + in_p = in && REG_P (in) && (int) REGNO (in) == hard_regno; + out_p = out && REG_P (out) && (int) REGNO (out) == hard_regno; + if ((in_p || out_p) + && find_regno_note (insn, REG_DEAD, hard_regno) != NULL_RTX) + { + saved_cost = 0; + if (in_p) + saved_cost += ira_memory_move_cost + [ALLOCNO_MODE (a)][ALLOCNO_COVER_CLASS (a)][1]; + if (out_p) + saved_cost + += ira_memory_move_cost + [ALLOCNO_MODE (a)][ALLOCNO_COVER_CLASS (a)][0]; + cost -= REG_FREQ_FROM_BB (BLOCK_FOR_INSN (insn)) * saved_cost; + } + } + *excess_pressure_live_length = length; + *call_used_count = count; + hard_regno = -1; + if (regnos[0] >= 0) + { + hard_regno = reg_renumber[regnos[0]]; + } + *first_hard_regno = hard_regno; + return cost; +} + +/* Return TRUE if spilling pseudo-registers whose numbers are in array + REGNOS is better than spilling pseudo-registers with numbers in + OTHER_REGNOS for reload with given IN and OUT for INSN. The + function used by the reload pass to make better register spilling + decisions. */ +bool +ira_better_spill_reload_regno_p (int *regnos, int *other_regnos, + rtx in, rtx out, rtx insn) +{ + int cost, other_cost; + int length, other_length; + int nrefs, other_nrefs; + int call_used_count, other_call_used_count; + int hard_regno, other_hard_regno; + + cost = calculate_spill_cost (regnos, in, out, insn, + &length, &nrefs, &call_used_count, &hard_regno); + other_cost = calculate_spill_cost (other_regnos, in, out, insn, + &other_length, &other_nrefs, + &other_call_used_count, + &other_hard_regno); + if (nrefs == 0 && other_nrefs != 0) + return true; + if (nrefs != 0 && other_nrefs == 0) + return false; + if (cost != other_cost) + return cost < other_cost; + if (length != other_length) + return length > other_length; +#ifdef REG_ALLOC_ORDER + if (hard_regno >= 0 && other_hard_regno >= 0) + return (inv_reg_alloc_order[hard_regno] + < inv_reg_alloc_order[other_hard_regno]); +#else + if (call_used_count != other_call_used_count) + return call_used_count > other_call_used_count; +#endif + return false; +} + + + +/* Allocate and initialize data necessary for assign_hard_reg. */ +void +ira_initiate_assign (void) +{ + sorted_allocnos + = (ira_allocno_t *) ira_allocate (sizeof (ira_allocno_t) + * ira_allocnos_num); + consideration_allocno_bitmap = ira_allocate_bitmap (); + initiate_cost_update (); + allocno_priorities = (int *) ira_allocate (sizeof (int) * ira_allocnos_num); +} + +/* Deallocate data used by assign_hard_reg. */ +void +ira_finish_assign (void) +{ + ira_free (sorted_allocnos); + ira_free_bitmap (consideration_allocno_bitmap); + finish_cost_update (); + ira_free (allocno_priorities); +} + + + +/* Entry function doing color-based register allocation. */ +void +ira_color (void) +{ + allocno_stack_vec = VEC_alloc (ira_allocno_t, heap, ira_allocnos_num); + removed_splay_allocno_vec + = VEC_alloc (ira_allocno_t, heap, ira_allocnos_num); + memset (allocated_hardreg_p, 0, sizeof (allocated_hardreg_p)); + ira_initiate_assign (); + do_coloring (); + ira_finish_assign (); + VEC_free (ira_allocno_t, heap, removed_splay_allocno_vec); + VEC_free (ira_allocno_t, heap, allocno_stack_vec); + move_spill_restore (); +} + + + +/* This page contains a simple register allocator without usage of + allocno conflicts. This is used for fast allocation for -O0. */ + +/* Do register allocation by not using allocno conflicts. It uses + only allocno live ranges. The algorithm is close to Chow's + priority coloring. */ +void +ira_fast_allocation (void) +{ + int i, j, k, l, num, class_size, hard_regno; +#ifdef STACK_REGS + bool no_stack_reg_p; +#endif + enum reg_class cover_class; + enum machine_mode mode; + ira_allocno_t a; + ira_allocno_iterator ai; + allocno_live_range_t r; + HARD_REG_SET conflict_hard_regs, *used_hard_regs; + + allocno_priorities = (int *) ira_allocate (sizeof (int) * ira_allocnos_num); + FOR_EACH_ALLOCNO (a, ai) + { + l = ALLOCNO_EXCESS_PRESSURE_POINTS_NUM (a); + if (l <= 0) + l = 1; + allocno_priorities[ALLOCNO_NUM (a)] + = (((double) (floor_log2 (ALLOCNO_NREFS (a)) + * (ALLOCNO_MEMORY_COST (a) + - ALLOCNO_COVER_CLASS_COST (a))) / l) + * (10000 / REG_FREQ_MAX) + * ira_reg_class_nregs[ALLOCNO_COVER_CLASS (a)][ALLOCNO_MODE (a)]); + } + used_hard_regs = (HARD_REG_SET *) ira_allocate (sizeof (HARD_REG_SET) + * ira_max_point); + for (i = 0; i < ira_max_point; i++) + CLEAR_HARD_REG_SET (used_hard_regs[i]); + sorted_allocnos = (ira_allocno_t *) ira_allocate (sizeof (ira_allocno_t) + * ira_allocnos_num); + num = 0; + FOR_EACH_ALLOCNO (a, ai) + sorted_allocnos[num++] = a; + qsort (sorted_allocnos, ira_allocnos_num, sizeof (ira_allocno_t), + allocno_priority_compare_func); + for (i = 0; i < num; i++) + { + a = sorted_allocnos[i]; + COPY_HARD_REG_SET (conflict_hard_regs, ALLOCNO_CONFLICT_HARD_REGS (a)); + for (r = ALLOCNO_LIVE_RANGES (a); r != NULL; r = r->next) + for (j = r->start; j <= r->finish; j++) + IOR_HARD_REG_SET (conflict_hard_regs, used_hard_regs[j]); + cover_class = ALLOCNO_COVER_CLASS (a); + ALLOCNO_ASSIGNED_P (a) = true; + ALLOCNO_HARD_REGNO (a) = -1; + if (hard_reg_set_subset_p (reg_class_contents[cover_class], + conflict_hard_regs)) + continue; + mode = ALLOCNO_MODE (a); +#ifdef STACK_REGS + no_stack_reg_p = ALLOCNO_NO_STACK_REG_P (a); +#endif + class_size = ira_class_hard_regs_num[cover_class]; + for (j = 0; j < class_size; j++) + { + hard_regno = ira_class_hard_regs[cover_class][j]; +#ifdef STACK_REGS + if (no_stack_reg_p && FIRST_STACK_REG <= hard_regno + && hard_regno <= LAST_STACK_REG) + continue; +#endif + if (!ira_hard_reg_not_in_set_p (hard_regno, mode, conflict_hard_regs) + || (TEST_HARD_REG_BIT + (prohibited_class_mode_regs[cover_class][mode], hard_regno))) + continue; + ALLOCNO_HARD_REGNO (a) = hard_regno; + for (r = ALLOCNO_LIVE_RANGES (a); r != NULL; r = r->next) + for (k = r->start; k <= r->finish; k++) + IOR_HARD_REG_SET (used_hard_regs[k], + ira_reg_mode_hard_regset[hard_regno][mode]); + break; + } + } + ira_free (sorted_allocnos); + ira_free (used_hard_regs); + ira_free (allocno_priorities); + if (internal_flag_ira_verbose > 1 && ira_dump_file != NULL) + ira_print_disposition (ira_dump_file); +} diff --git a/gcc/ira-conflicts.c b/gcc/ira-conflicts.c new file mode 100644 index 00000000000..04d3e42d64d --- /dev/null +++ b/gcc/ira-conflicts.c @@ -0,0 +1,777 @@ +/* IRA conflict builder. + Copyright (C) 2006, 2007, 2008 + Free Software Foundation, Inc. + Contributed by Vladimir Makarov <vmakarov@redhat.com>. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 3, or (at your option) any later +version. + +GCC is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +<http://www.gnu.org/licenses/>. */ + +#include "config.h" +#include "system.h" +#include "coretypes.h" +#include "tm.h" +#include "regs.h" +#include "rtl.h" +#include "tm_p.h" +#include "target.h" +#include "flags.h" +#include "hard-reg-set.h" +#include "basic-block.h" +#include "insn-config.h" +#include "recog.h" +#include "toplev.h" +#include "params.h" +#include "df.h" +#include "sparseset.h" +#include "ira-int.h" + +/* This file contains code responsible for allocno conflict creation, + allocno copy creation and allocno info accumulation on upper level + regions. */ + +/* ira_allocnos_num array of arrays of bits, recording whether two + allocno's conflict (can't go in the same hardware register). + + Some arrays will be used as conflict bit vector of the + corresponding allocnos see function build_allocno_conflicts. */ +static IRA_INT_TYPE **conflicts; + +/* Macro to test a conflict of A1 and A2 in `conflicts'. */ +#define CONFLICT_ALLOCNO_P(A1, A2) \ + (ALLOCNO_MIN (A1) <= ALLOCNO_CONFLICT_ID (A2) \ + && ALLOCNO_CONFLICT_ID (A2) <= ALLOCNO_MAX (A1) \ + && TEST_ALLOCNO_SET_BIT (conflicts[ALLOCNO_NUM (A1)], \ + ALLOCNO_CONFLICT_ID (A2), \ + ALLOCNO_MIN (A1), \ + ALLOCNO_MAX (A1))) + + + +/* Build allocno conflict table by processing allocno live ranges. */ +static void +build_conflict_bit_table (void) +{ + int i, num, id, allocated_words_num, conflict_bit_vec_words_num; + unsigned int j; + enum reg_class cover_class; + ira_allocno_t allocno, live_a; + allocno_live_range_t r; + ira_allocno_iterator ai; + sparseset allocnos_live; + int allocno_set_words; + + allocno_set_words = (ira_allocnos_num + IRA_INT_BITS - 1) / IRA_INT_BITS; + allocnos_live = sparseset_alloc (ira_allocnos_num); + conflicts = (IRA_INT_TYPE **) ira_allocate (sizeof (IRA_INT_TYPE *) + * ira_allocnos_num); + allocated_words_num = 0; + FOR_EACH_ALLOCNO (allocno, ai) + { + num = ALLOCNO_NUM (allocno); + if (ALLOCNO_MAX (allocno) < ALLOCNO_MIN (allocno)) + { + conflicts[num] = NULL; + continue; + } + conflict_bit_vec_words_num + = ((ALLOCNO_MAX (allocno) - ALLOCNO_MIN (allocno) + IRA_INT_BITS) + / IRA_INT_BITS); + allocated_words_num += conflict_bit_vec_words_num; + conflicts[num] + = (IRA_INT_TYPE *) ira_allocate (sizeof (IRA_INT_TYPE) + * conflict_bit_vec_words_num); + memset (conflicts[num], 0, + sizeof (IRA_INT_TYPE) * conflict_bit_vec_words_num); + } + if (internal_flag_ira_verbose > 0 && ira_dump_file != NULL) + fprintf + (ira_dump_file, + "+++Allocating %ld bytes for conflict table (uncompressed size %ld)\n", + (long) allocated_words_num * sizeof (IRA_INT_TYPE), + (long) allocno_set_words * ira_allocnos_num * sizeof (IRA_INT_TYPE)); + for (i = 0; i < ira_max_point; i++) + { + for (r = ira_start_point_ranges[i]; r != NULL; r = r->start_next) + { + allocno = r->allocno; + num = ALLOCNO_NUM (allocno); + id = ALLOCNO_CONFLICT_ID (allocno); + cover_class = ALLOCNO_COVER_CLASS (allocno); + sparseset_set_bit (allocnos_live, num); + EXECUTE_IF_SET_IN_SPARSESET (allocnos_live, j) + { + live_a = ira_allocnos[j]; + if (cover_class == ALLOCNO_COVER_CLASS (live_a) + /* Don't set up conflict for the allocno with itself. */ + && num != (int) j) + { + SET_ALLOCNO_SET_BIT (conflicts[num], + ALLOCNO_CONFLICT_ID (live_a), + ALLOCNO_MIN (allocno), + ALLOCNO_MAX (allocno)); + SET_ALLOCNO_SET_BIT (conflicts[j], id, + ALLOCNO_MIN (live_a), + ALLOCNO_MAX (live_a)); + } + } + } + + for (r = ira_finish_point_ranges[i]; r != NULL; r = r->finish_next) + sparseset_clear_bit (allocnos_live, ALLOCNO_NUM (r->allocno)); + } + sparseset_free (allocnos_live); +} + + + +/* Return TRUE if the operand constraint STR is commutative. */ +static bool +commutative_constraint_p (const char *str) +{ + bool ignore_p; + int c; + + for (ignore_p = false;;) + { + c = *str; + if (c == '\0') + break; + str += CONSTRAINT_LEN (c, str); + if (c == '#') + ignore_p = true; + else if (c == ',') + ignore_p = false; + else if (! ignore_p) + { + /* Usually `%' is the first constraint character but the + documentation does not require this. */ + if (c == '%') + return true; + } + } + return false; +} + +/* Return the number of the operand which should be the same in any + case as operand with number OP_NUM (or negative value if there is + no such operand). If USE_COMMUT_OP_P is TRUE, the function makes + temporarily commutative operand exchange before this. The function + takes only really possible alternatives into consideration. */ +static int +get_dup_num (int op_num, bool use_commut_op_p) +{ + int curr_alt, c, original, dup; + bool ignore_p, commut_op_used_p; + const char *str; + rtx op; + + if (op_num < 0 || recog_data.n_alternatives == 0) + return -1; + op = recog_data.operand[op_num]; + ira_assert (REG_P (op)); + commut_op_used_p = true; + if (use_commut_op_p) + { + if (commutative_constraint_p (recog_data.constraints[op_num])) + op_num++; + else if (op_num > 0 && commutative_constraint_p (recog_data.constraints + [op_num - 1])) + op_num--; + else + commut_op_used_p = false; + } + str = recog_data.constraints[op_num]; + for (ignore_p = false, original = -1, curr_alt = 0;;) + { + c = *str; + if (c == '\0') + break; + if (c == '#') + ignore_p = true; + else if (c == ',') + { + curr_alt++; + ignore_p = false; + } + else if (! ignore_p) + switch (c) + { + case 'X': + return -1; + + case 'm': + case 'o': + /* Accept a register which might be placed in memory. */ + return -1; + break; + + case 'V': + case '<': + case '>': + break; + + case 'p': + GO_IF_LEGITIMATE_ADDRESS (VOIDmode, op, win_p); + break; + + win_p: + return -1; + + case 'g': + return -1; + + case 'r': + case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': + case 'h': case 'j': case 'k': case 'l': + case 'q': case 't': case 'u': + case 'v': case 'w': case 'x': case 'y': case 'z': + case 'A': case 'B': case 'C': case 'D': + case 'Q': case 'R': case 'S': case 'T': case 'U': + case 'W': case 'Y': case 'Z': + { + enum reg_class cl; + + cl = (c == 'r' + ? GENERAL_REGS : REG_CLASS_FROM_CONSTRAINT (c, str)); + if (cl != NO_REGS) + return -1; +#ifdef EXTRA_CONSTRAINT_STR + else if (EXTRA_CONSTRAINT_STR (op, c, str)) + return -1; +#endif + break; + } + + case '0': case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': + if (original != -1 && original != c) + return -1; + original = c; + break; + } + str += CONSTRAINT_LEN (c, str); + } + if (original == -1) + return -1; + dup = original - '0'; + if (use_commut_op_p) + { + if (commutative_constraint_p (recog_data.constraints[dup])) + dup++; + else if (dup > 0 + && commutative_constraint_p (recog_data.constraints[dup -1])) + dup--; + else if (! commut_op_used_p) + return -1; + } + return dup; +} + +/* Return the operand which should be, in any case, the same as + operand with number OP_NUM. If USE_COMMUT_OP_P is TRUE, the + function makes temporarily commutative operand exchange before + this. */ +static rtx +get_dup (int op_num, bool use_commut_op_p) +{ + int n = get_dup_num (op_num, use_commut_op_p); + + if (n < 0) + return NULL_RTX; + else + return recog_data.operand[n]; +} + +/* Process registers REG1 and REG2 in move INSN with execution + frequency FREQ. The function also processes the registers in a + potential move insn (INSN == NULL in this case) with frequency + FREQ. The function can modify hard register costs of the + corresponding allocnos or create a copy involving the corresponding + allocnos. The function does nothing if the both registers are hard + registers. When nothing is changed, the function returns + FALSE. */ +static bool +process_regs_for_copy (rtx reg1, rtx reg2, rtx insn, int freq) +{ + int hard_regno, cost, index; + ira_allocno_t a; + enum reg_class rclass, cover_class; + enum machine_mode mode; + ira_copy_t cp; + + gcc_assert (REG_P (reg1) && REG_P (reg2)); + if (HARD_REGISTER_P (reg1)) + { + if (HARD_REGISTER_P (reg2)) + return false; + hard_regno = REGNO (reg1); + a = ira_curr_regno_allocno_map[REGNO (reg2)]; + } + else if (HARD_REGISTER_P (reg2)) + { + hard_regno = REGNO (reg2); + a = ira_curr_regno_allocno_map[REGNO (reg1)]; + } + else if (!CONFLICT_ALLOCNO_P (ira_curr_regno_allocno_map[REGNO (reg1)], + ira_curr_regno_allocno_map[REGNO (reg2)])) + { + cp = ira_add_allocno_copy (ira_curr_regno_allocno_map[REGNO (reg1)], + ira_curr_regno_allocno_map[REGNO (reg2)], + freq, insn, ira_curr_loop_tree_node); + bitmap_set_bit (ira_curr_loop_tree_node->local_copies, cp->num); + return true; + } + else + return false; + rclass = REGNO_REG_CLASS (hard_regno); + mode = ALLOCNO_MODE (a); + cover_class = ALLOCNO_COVER_CLASS (a); + if (! ira_class_subset_p[rclass][cover_class]) + return false; + if (reg_class_size[rclass] <= (unsigned) CLASS_MAX_NREGS (rclass, mode)) + /* It is already taken into account in ira-costs.c. */ + return false; + index = ira_class_hard_reg_index[cover_class][hard_regno]; + if (index < 0) + return false; + if (HARD_REGISTER_P (reg1)) + cost = ira_register_move_cost[mode][cover_class][rclass] * freq; + else + cost = ira_register_move_cost[mode][rclass][cover_class] * freq; + ira_allocate_and_set_costs + (&ALLOCNO_HARD_REG_COSTS (a), cover_class, + ALLOCNO_COVER_CLASS_COST (a)); + ira_allocate_and_set_costs + (&ALLOCNO_CONFLICT_HARD_REG_COSTS (a), cover_class, 0); + ALLOCNO_HARD_REG_COSTS (a)[index] -= cost; + ALLOCNO_CONFLICT_HARD_REG_COSTS (a)[index] -= cost; + return true; +} + +/* Process all of the output registers of the current insn and + the input register REG (its operand number OP_NUM) which dies in the + insn as if there were a move insn between them with frequency + FREQ. */ +static void +process_reg_shuffles (rtx reg, int op_num, int freq) +{ + int i; + rtx another_reg; + + gcc_assert (REG_P (reg)); + for (i = 0; i < recog_data.n_operands; i++) + { + another_reg = recog_data.operand[i]; + + if (!REG_P (another_reg) || op_num == i + || recog_data.operand_type[i] != OP_OUT) + continue; + + process_regs_for_copy (reg, another_reg, NULL_RTX, freq); + } +} + +/* Process INSN and create allocno copies if necessary. For example, + it might be because INSN is a pseudo-register move or INSN is two + operand insn. */ +static void +add_insn_allocno_copies (rtx insn) +{ + rtx set, operand, dup; + const char *str; + bool commut_p, bound_p; + int i, j, freq; + + freq = REG_FREQ_FROM_BB (BLOCK_FOR_INSN (insn)); + if (freq == 0) + freq = 1; + if ((set = single_set (insn)) != NULL_RTX + && REG_P (SET_DEST (set)) && REG_P (SET_SRC (set)) + && ! side_effects_p (set) + && find_reg_note (insn, REG_DEAD, SET_SRC (set)) != NULL_RTX) + process_regs_for_copy (SET_DEST (set), SET_SRC (set), insn, freq); + else + { + extract_insn (insn); + for (i = 0; i < recog_data.n_operands; i++) + { + operand = recog_data.operand[i]; + if (REG_P (operand) + && find_reg_note (insn, REG_DEAD, operand) != NULL_RTX) + { + str = recog_data.constraints[i]; + while (*str == ' ' && *str == '\t') + str++; + bound_p = false; + for (j = 0, commut_p = false; j < 2; j++, commut_p = true) + if ((dup = get_dup (i, commut_p)) != NULL_RTX + && REG_P (dup) && GET_MODE (operand) == GET_MODE (dup) + && process_regs_for_copy (operand, dup, NULL_RTX, freq)) + bound_p = true; + if (bound_p) + continue; + /* If an operand dies, prefer its hard register for the + output operands by decreasing the hard register cost + or creating the corresponding allocno copies. The + cost will not correspond to a real move insn cost, so + make the frequency smaller. */ + process_reg_shuffles (operand, i, freq < 8 ? 1 : freq / 8); + } + } + } +} + +/* Add copies originated from BB given by LOOP_TREE_NODE. */ +static void +add_copies (ira_loop_tree_node_t loop_tree_node) +{ + basic_block bb; + rtx insn; + + bb = loop_tree_node->bb; + if (bb == NULL) + return; + FOR_BB_INSNS (bb, insn) + if (INSN_P (insn)) + add_insn_allocno_copies (insn); +} + +/* Propagate copies the corresponding allocnos on upper loop tree + level. */ +static void +propagate_copies (void) +{ + ira_copy_t cp; + ira_copy_iterator ci; + ira_allocno_t a1, a2, parent_a1, parent_a2; + ira_loop_tree_node_t parent; + + FOR_EACH_COPY (cp, ci) + { + a1 = cp->first; + a2 = cp->second; + if (ALLOCNO_LOOP_TREE_NODE (a1) == ira_loop_tree_root) + continue; + ira_assert ((ALLOCNO_LOOP_TREE_NODE (a2) != ira_loop_tree_root)); + parent = ALLOCNO_LOOP_TREE_NODE (a1)->parent; + if ((parent_a1 = ALLOCNO_CAP (a1)) == NULL) + parent_a1 = parent->regno_allocno_map[ALLOCNO_REGNO (a1)]; + if ((parent_a2 = ALLOCNO_CAP (a2)) == NULL) + parent_a2 = parent->regno_allocno_map[ALLOCNO_REGNO (a2)]; + ira_assert (parent_a1 != NULL && parent_a2 != NULL); + if (! CONFLICT_ALLOCNO_P (parent_a1, parent_a2)) + ira_add_allocno_copy (parent_a1, parent_a1, cp->freq, + cp->insn, cp->loop_tree_node); + } +} + +/* Return TRUE if live ranges of allocnos A1 and A2 intersect. It is + used to find a conflict for new allocnos or allocnos with the + different cover classes. */ +bool +ira_allocno_live_ranges_intersect_p (ira_allocno_t a1, ira_allocno_t a2) +{ + allocno_live_range_t r1, r2; + + if (a1 == a2) + return false; + if (ALLOCNO_REG (a1) != NULL && ALLOCNO_REG (a2) != NULL + && (ORIGINAL_REGNO (ALLOCNO_REG (a1)) + == ORIGINAL_REGNO (ALLOCNO_REG (a2)))) + return false; + /* Remember the ranges are always kept ordered. */ + for (r1 = ALLOCNO_LIVE_RANGES (a1), r2 = ALLOCNO_LIVE_RANGES (a2); + r1 != NULL && r2 != NULL;) + { + if (r1->start > r2->finish) + r1 = r1->next; + else if (r2->start > r1->finish) + r2 = r2->next; + else + return true; + } + return false; +} + +/* Return TRUE if live ranges of pseudo-registers REGNO1 and REGNO2 + intersect. This should be used when there is only one region. + Currently this is used during reload. */ +bool +ira_pseudo_live_ranges_intersect_p (int regno1, int regno2) +{ + ira_allocno_t a1, a2; + + ira_assert (regno1 >= FIRST_PSEUDO_REGISTER + && regno2 >= FIRST_PSEUDO_REGISTER); + /* Reg info caclulated by dataflow infrastructure can be different + from one calculated by regclass. */ + if ((a1 = ira_loop_tree_root->regno_allocno_map[regno1]) == NULL + || (a2 = ira_loop_tree_root->regno_allocno_map[regno2]) == NULL) + return false; + return ira_allocno_live_ranges_intersect_p (a1, a2); +} + +/* Array used to collect all conflict allocnos for given allocno. */ +static ira_allocno_t *collected_conflict_allocnos; + +/* Build conflict vectors or bit conflict vectors (whatever is more + profitable) for allocno A from the conflict table and propagate the + conflicts to upper level allocno. */ +static void +build_allocno_conflicts (ira_allocno_t a) +{ + int i, px, parent_num; + int conflict_bit_vec_words_num; + ira_loop_tree_node_t parent; + ira_allocno_t parent_a, another_a, another_parent_a; + ira_allocno_t *vec; + IRA_INT_TYPE *allocno_conflicts; + ira_allocno_set_iterator asi; + + allocno_conflicts = conflicts[ALLOCNO_NUM (a)]; + px = 0; + FOR_EACH_ALLOCNO_IN_SET (allocno_conflicts, + ALLOCNO_MIN (a), ALLOCNO_MAX (a), i, asi) + { + another_a = ira_conflict_id_allocno_map[i]; + ira_assert (ALLOCNO_COVER_CLASS (a) + == ALLOCNO_COVER_CLASS (another_a)); + collected_conflict_allocnos[px++] = another_a; + } + if (ira_conflict_vector_profitable_p (a, px)) + { + ira_allocate_allocno_conflict_vec (a, px); + vec = (ira_allocno_t*) ALLOCNO_CONFLICT_ALLOCNO_ARRAY (a); + memcpy (vec, collected_conflict_allocnos, sizeof (ira_allocno_t) * px); + vec[px] = NULL; + ALLOCNO_CONFLICT_ALLOCNOS_NUM (a) = px; + } + else + { + ALLOCNO_CONFLICT_ALLOCNO_ARRAY (a) = conflicts[ALLOCNO_NUM (a)]; + if (ALLOCNO_MAX (a) < ALLOCNO_MIN (a)) + conflict_bit_vec_words_num = 0; + else + conflict_bit_vec_words_num + = ((ALLOCNO_MAX (a) - ALLOCNO_MIN (a) + IRA_INT_BITS) + / IRA_INT_BITS); + ALLOCNO_CONFLICT_ALLOCNO_ARRAY_SIZE (a) + = conflict_bit_vec_words_num * sizeof (IRA_INT_TYPE); + } + parent = ALLOCNO_LOOP_TREE_NODE (a)->parent; + if ((parent_a = ALLOCNO_CAP (a)) == NULL + && (parent == NULL + || (parent_a = parent->regno_allocno_map[ALLOCNO_REGNO (a)]) + == NULL)) + return; + ira_assert (parent != NULL); + ira_assert (ALLOCNO_COVER_CLASS (a) == ALLOCNO_COVER_CLASS (parent_a)); + parent_num = ALLOCNO_NUM (parent_a); + FOR_EACH_ALLOCNO_IN_SET (allocno_conflicts, + ALLOCNO_MIN (a), ALLOCNO_MAX (a), i, asi) + { + another_a = ira_conflict_id_allocno_map[i]; + ira_assert (ALLOCNO_COVER_CLASS (a) + == ALLOCNO_COVER_CLASS (another_a)); + if ((another_parent_a = ALLOCNO_CAP (another_a)) == NULL + && (another_parent_a = (parent->regno_allocno_map + [ALLOCNO_REGNO (another_a)])) == NULL) + continue; + ira_assert (ALLOCNO_NUM (another_parent_a) >= 0); + ira_assert (ALLOCNO_COVER_CLASS (another_a) + == ALLOCNO_COVER_CLASS (another_parent_a)); + SET_ALLOCNO_SET_BIT (conflicts[parent_num], + ALLOCNO_CONFLICT_ID (another_parent_a), + ALLOCNO_MIN (parent_a), + ALLOCNO_MAX (parent_a)); + } +} + +/* Build conflict vectors or bit conflict vectors (whatever is more + profitable) of all allocnos from the conflict table. */ +static void +build_conflicts (void) +{ + int i; + ira_allocno_t a, cap; + + collected_conflict_allocnos + = (ira_allocno_t *) ira_allocate (sizeof (ira_allocno_t) + * ira_allocnos_num); + for (i = max_reg_num () - 1; i >= FIRST_PSEUDO_REGISTER; i--) + for (a = ira_regno_allocno_map[i]; + a != NULL; + a = ALLOCNO_NEXT_REGNO_ALLOCNO (a)) + { + build_allocno_conflicts (a); + for (cap = ALLOCNO_CAP (a); cap != NULL; cap = ALLOCNO_CAP (cap)) + build_allocno_conflicts (cap); + } + ira_free (collected_conflict_allocnos); +} + + + +/* Print hard reg set SET with TITLE to FILE. */ +static void +print_hard_reg_set (FILE *file, const char *title, HARD_REG_SET set) +{ + int i, start; + + fprintf (file, title); + for (start = -1, i = 0; i < FIRST_PSEUDO_REGISTER; i++) + { + if (TEST_HARD_REG_BIT (set, i)) + { + if (i == 0 || ! TEST_HARD_REG_BIT (set, i - 1)) + start = i; + } + if (start >= 0 + && (i == FIRST_PSEUDO_REGISTER - 1 || ! TEST_HARD_REG_BIT (set, i))) + { + if (start == i - 1) + fprintf (file, " %d", start); + else if (start == i - 2) + fprintf (file, " %d %d", start, start + 1); + else + fprintf (file, " %d-%d", start, i - 1); + start = -1; + } + } + fprintf (file, "\n"); +} + +/* Print information about allocno or only regno (if REG_P) conflicts + to FILE. */ +static void +print_conflicts (FILE *file, bool reg_p) +{ + ira_allocno_t a; + ira_allocno_iterator ai; + HARD_REG_SET conflicting_hard_regs; + + FOR_EACH_ALLOCNO (a, ai) + { + ira_allocno_t conflict_a; + ira_allocno_conflict_iterator aci; + basic_block bb; + + if (reg_p) + fprintf (file, ";; r%d", ALLOCNO_REGNO (a)); + else + { + fprintf (file, ";; a%d(r%d,", ALLOCNO_NUM (a), ALLOCNO_REGNO (a)); + if ((bb = ALLOCNO_LOOP_TREE_NODE (a)->bb) != NULL) + fprintf (file, "b%d", bb->index); + else + fprintf (file, "l%d", ALLOCNO_LOOP_TREE_NODE (a)->loop->num); + fprintf (file, ")"); + } + fprintf (file, " conflicts:"); + if (ALLOCNO_CONFLICT_ALLOCNO_ARRAY (a) != NULL) + FOR_EACH_ALLOCNO_CONFLICT (a, conflict_a, aci) + { + if (reg_p) + fprintf (file, " r%d,", ALLOCNO_REGNO (conflict_a)); + else + { + fprintf (file, " a%d(r%d,", ALLOCNO_NUM (conflict_a), + ALLOCNO_REGNO (conflict_a)); + if ((bb = ALLOCNO_LOOP_TREE_NODE (conflict_a)->bb) != NULL) + fprintf (file, "b%d)", bb->index); + else + fprintf (file, "l%d)", + ALLOCNO_LOOP_TREE_NODE (conflict_a)->loop->num); + } + } + COPY_HARD_REG_SET (conflicting_hard_regs, + ALLOCNO_TOTAL_CONFLICT_HARD_REGS (a)); + AND_COMPL_HARD_REG_SET (conflicting_hard_regs, ira_no_alloc_regs); + AND_HARD_REG_SET (conflicting_hard_regs, + reg_class_contents[ALLOCNO_COVER_CLASS (a)]); + print_hard_reg_set (file, "\n;; total conflict hard regs:", + conflicting_hard_regs); + COPY_HARD_REG_SET (conflicting_hard_regs, + ALLOCNO_CONFLICT_HARD_REGS (a)); + AND_COMPL_HARD_REG_SET (conflicting_hard_regs, ira_no_alloc_regs); + AND_HARD_REG_SET (conflicting_hard_regs, + reg_class_contents[ALLOCNO_COVER_CLASS (a)]); + print_hard_reg_set (file, ";; conflict hard regs:", + conflicting_hard_regs); + } + fprintf (file, "\n"); +} + +/* Print information about allocno or only regno (if REG_P) conflicts + to stderr. */ +void +ira_debug_conflicts (bool reg_p) +{ + print_conflicts (stderr, reg_p); +} + + + +/* Entry function which builds allocno conflicts and allocno copies + and accumulate some allocno info on upper level regions. */ +void +ira_build_conflicts (void) +{ + ira_allocno_t a; + ira_allocno_iterator ai; + + if (optimize) + { + build_conflict_bit_table (); + build_conflicts (); + ira_traverse_loop_tree (true, ira_loop_tree_root, NULL, add_copies); + /* We need finished conflict table for the subsequent call. */ + if (flag_ira_algorithm == IRA_ALGORITHM_REGIONAL + || flag_ira_algorithm == IRA_ALGORITHM_MIXED) + propagate_copies (); + /* Now we can free memory for the conflict table (see function + build_allocno_conflicts for details). */ + FOR_EACH_ALLOCNO (a, ai) + { + if (ALLOCNO_CONFLICT_ALLOCNO_ARRAY (a) != conflicts[ALLOCNO_NUM (a)]) + ira_free (conflicts[ALLOCNO_NUM (a)]); + } + ira_free (conflicts); + } + FOR_EACH_ALLOCNO (a, ai) + { + if (ALLOCNO_CALLS_CROSSED_NUM (a) == 0) + continue; + if (! flag_caller_saves) + { + IOR_HARD_REG_SET (ALLOCNO_TOTAL_CONFLICT_HARD_REGS (a), + call_used_reg_set); + if (ALLOCNO_CALLS_CROSSED_NUM (a) != 0) + IOR_HARD_REG_SET (ALLOCNO_CONFLICT_HARD_REGS (a), + call_used_reg_set); + } + else + { + IOR_HARD_REG_SET (ALLOCNO_TOTAL_CONFLICT_HARD_REGS (a), + no_caller_save_reg_set); + if (ALLOCNO_CALLS_CROSSED_NUM (a) != 0) + IOR_HARD_REG_SET (ALLOCNO_CONFLICT_HARD_REGS (a), + no_caller_save_reg_set); + } + } + if (optimize && internal_flag_ira_verbose > 2 && ira_dump_file != NULL) + print_conflicts (ira_dump_file, false); +} diff --git a/gcc/ira-costs.c b/gcc/ira-costs.c new file mode 100644 index 00000000000..774902035e1 --- /dev/null +++ b/gcc/ira-costs.c @@ -0,0 +1,1594 @@ +/* IRA hard register and memory cost calculation for allocnos. + Copyright (C) 2006, 2007, 2008 + Free Software Foundation, Inc. + Contributed by Vladimir Makarov <vmakarov@redhat.com>. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 3, or (at your option) any later +version. + +GCC is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +<http://www.gnu.org/licenses/>. */ + +#include "config.h" +#include "system.h" +#include "coretypes.h" +#include "tm.h" +#include "hard-reg-set.h" +#include "rtl.h" +#include "expr.h" +#include "tm_p.h" +#include "flags.h" +#include "basic-block.h" +#include "regs.h" +#include "addresses.h" +#include "insn-config.h" +#include "recog.h" +#include "toplev.h" +#include "target.h" +#include "params.h" +#include "ira-int.h" + +/* The file contains code is similar to one in regclass but the code + works on the allocno basis. */ + +#ifdef FORBIDDEN_INC_DEC_CLASSES +/* Indexed by n, is TRUE if allocno with number N is used in an + auto-inc or auto-dec context. */ +static bool *in_inc_dec; +#endif + +/* The `costs' struct records the cost of using hard registers of each + class considered for the calculation and of using memory for each + allocno. */ +struct costs +{ + int mem_cost; + /* Costs for register classes start here. We process only some + register classes (cover classes on the 1st cost calculation + iteration and important classes on the 2nd iteration). */ + int cost[1]; +}; + +/* Initialized once. It is a maximal possible size of the allocated + struct costs. */ +static int max_struct_costs_size; + +/* Allocated and initialized once, and used to initialize cost values + for each insn. */ +static struct costs *init_cost; + +/* Allocated once, and used for temporary purposes. */ +static struct costs *temp_costs; + +/* Allocated once, and used for the cost calculation. */ +static struct costs *op_costs[MAX_RECOG_OPERANDS]; +static struct costs *this_op_costs[MAX_RECOG_OPERANDS]; + +/* Original and accumulated costs of each class for each allocno. */ +static struct costs *allocno_costs, *total_costs; + +/* Classes used for cost calculation. They may be different on + different iterations of the cost calculations or in different + optimization modes. */ +static enum reg_class *cost_classes; + +/* The size of the previous array. */ +static int cost_classes_num; + +/* Map: cost class -> order number (they start with 0) of the cost + class. */ +static int cost_class_nums[N_REG_CLASSES]; + +/* It is the current size of struct costs. */ +static int struct_costs_size; + +/* Return pointer to structure containing costs of allocno with given + NUM in array ARR. */ +#define COSTS_OF_ALLOCNO(arr, num) \ + ((struct costs *) ((char *) (arr) + (num) * struct_costs_size)) + +/* Record register class preferences of each allocno. Null value + means no preferences. It happens on the 1st iteration of the cost + calculation. */ +static enum reg_class *allocno_pref; + +/* Allocated buffers for allocno_pref. */ +static enum reg_class *allocno_pref_buffer; + +/* Execution frequency of the current insn. */ +static int frequency; + + + +/* Compute the cost of loading X into (if TO_P is TRUE) or from (if + TO_P is FALSE) a register of class RCLASS in mode MODE. X must not + be a pseudo register. */ +static int +copy_cost (rtx x, enum machine_mode mode, enum reg_class rclass, bool to_p, + secondary_reload_info *prev_sri) +{ + secondary_reload_info sri; + enum reg_class secondary_class = NO_REGS; + + /* If X is a SCRATCH, there is actually nothing to move since we are + assuming optimal allocation. */ + if (GET_CODE (x) == SCRATCH) + return 0; + + /* Get the class we will actually use for a reload. */ + rclass = PREFERRED_RELOAD_CLASS (x, rclass); + + /* If we need a secondary reload for an intermediate, the cost is + that to load the input into the intermediate register, then to + copy it. */ + sri.prev_sri = prev_sri; + sri.extra_cost = 0; + secondary_class = targetm.secondary_reload (to_p, x, rclass, mode, &sri); + + if (ira_register_move_cost[mode] == NULL) + ira_init_register_move_cost (mode); + + if (secondary_class != NO_REGS) + return (move_cost[mode][secondary_class][rclass] + sri.extra_cost + + copy_cost (x, mode, secondary_class, to_p, &sri)); + + /* For memory, use the memory move cost, for (hard) registers, use + the cost to move between the register classes, and use 2 for + everything else (constants). */ + if (MEM_P (x) || rclass == NO_REGS) + return sri.extra_cost + ira_memory_move_cost[mode][rclass][to_p != 0]; + else if (REG_P (x)) + return + (sri.extra_cost + move_cost[mode][REGNO_REG_CLASS (REGNO (x))][rclass]); + else + /* If this is a constant, we may eventually want to call rtx_cost + here. */ + return sri.extra_cost + COSTS_N_INSNS (1); +} + + + +/* Record the cost of using memory or hard registers of various + classes for the operands in INSN. + + N_ALTS is the number of alternatives. + N_OPS is the number of operands. + OPS is an array of the operands. + MODES are the modes of the operands, in case any are VOIDmode. + CONSTRAINTS are the constraints to use for the operands. This array + is modified by this procedure. + + This procedure works alternative by alternative. For each + alternative we assume that we will be able to allocate all allocnos + to their ideal register class and calculate the cost of using that + alternative. Then we compute, for each operand that is a + pseudo-register, the cost of having the allocno allocated to each + register class and using it in that alternative. To this cost is + added the cost of the alternative. + + The cost of each class for this insn is its lowest cost among all + the alternatives. */ +static void +record_reg_classes (int n_alts, int n_ops, rtx *ops, + enum machine_mode *modes, const char **constraints, + rtx insn, struct costs **op_costs, + enum reg_class *allocno_pref) +{ + int alt; + int i, j, k; + rtx set; + + /* Process each alternative, each time minimizing an operand's cost + with the cost for each operand in that alternative. */ + for (alt = 0; alt < n_alts; alt++) + { + enum reg_class classes[MAX_RECOG_OPERANDS]; + int allows_mem[MAX_RECOG_OPERANDS]; + int rclass; + int alt_fail = 0; + int alt_cost = 0, op_cost_add; + + for (i = 0; i < n_ops; i++) + { + unsigned char c; + const char *p = constraints[i]; + rtx op = ops[i]; + enum machine_mode mode = modes[i]; + int allows_addr = 0; + int win = 0; + + /* Initially show we know nothing about the register class. */ + classes[i] = NO_REGS; + allows_mem[i] = 0; + + /* If this operand has no constraints at all, we can + conclude nothing about it since anything is valid. */ + if (*p == 0) + { + if (REG_P (op) && REGNO (op) >= FIRST_PSEUDO_REGISTER) + memset (this_op_costs[i], 0, struct_costs_size); + continue; + } + + /* If this alternative is only relevant when this operand + matches a previous operand, we do different things + depending on whether this operand is a allocno-reg or not. + We must process any modifiers for the operand before we + can make this test. */ + while (*p == '%' || *p == '=' || *p == '+' || *p == '&') + p++; + + if (p[0] >= '0' && p[0] <= '0' + i && (p[1] == ',' || p[1] == 0)) + { + /* Copy class and whether memory is allowed from the + matching alternative. Then perform any needed cost + computations and/or adjustments. */ + j = p[0] - '0'; + classes[i] = classes[j]; + allows_mem[i] = allows_mem[j]; + + if (! REG_P (op) || REGNO (op) < FIRST_PSEUDO_REGISTER) + { + /* If this matches the other operand, we have no + added cost and we win. */ + if (rtx_equal_p (ops[j], op)) + win = 1; + /* If we can put the other operand into a register, + add to the cost of this alternative the cost to + copy this operand to the register used for the + other operand. */ + else if (classes[j] != NO_REGS) + { + alt_cost += copy_cost (op, mode, classes[j], 1, NULL); + win = 1; + } + } + else if (! REG_P (ops[j]) + || REGNO (ops[j]) < FIRST_PSEUDO_REGISTER) + { + /* This op is an allocno but the one it matches is + not. */ + + /* If we can't put the other operand into a + register, this alternative can't be used. */ + + if (classes[j] == NO_REGS) + alt_fail = 1; + /* Otherwise, add to the cost of this alternative + the cost to copy the other operand to the hard + register used for this operand. */ + else + alt_cost += copy_cost (ops[j], mode, classes[j], 1, NULL); + } + else + { + /* The costs of this operand are not the same as the + other operand since move costs are not symmetric. + Moreover, if we cannot tie them, this alternative + needs to do a copy, which is one insn. */ + struct costs *pp = this_op_costs[i]; + + if (ira_register_move_cost[mode] == NULL) + ira_init_register_move_cost (mode); + + for (k = 0; k < cost_classes_num; k++) + { + rclass = cost_classes[k]; + pp->cost[k] + = ((recog_data.operand_type[i] != OP_OUT + ? ira_may_move_in_cost[mode][rclass] + [classes[i]] * frequency : 0) + + (recog_data.operand_type[i] != OP_IN + ? ira_may_move_out_cost[mode][classes[i]] + [rclass] * frequency : 0)); + } + + /* If the alternative actually allows memory, make + things a bit cheaper since we won't need an extra + insn to load it. */ + pp->mem_cost + = ((recog_data.operand_type[i] != OP_IN + ? ira_memory_move_cost[mode][classes[i]][0] : 0) + + (recog_data.operand_type[i] != OP_OUT + ? ira_memory_move_cost[mode][classes[i]][1] : 0) + - allows_mem[i]) * frequency; + /* If we have assigned a class to this allocno in our + first pass, add a cost to this alternative + corresponding to what we would add if this allocno + were not in the appropriate class. We could use + cover class here but it is less accurate + approximation. */ + if (allocno_pref) + { + enum reg_class pref_class + = allocno_pref[ALLOCNO_NUM + (ira_curr_regno_allocno_map + [REGNO (op)])]; + + if (pref_class == NO_REGS) + alt_cost + += ((recog_data.operand_type[i] != OP_IN + ? ira_memory_move_cost[mode][classes[i]][0] + : 0) + + (recog_data.operand_type[i] != OP_OUT + ? ira_memory_move_cost[mode][classes[i]][1] + : 0)); + else if (ira_reg_class_intersect + [pref_class][classes[i]] == NO_REGS) + alt_cost += (ira_register_move_cost + [mode][pref_class][classes[i]]); + } + if (REGNO (ops[i]) != REGNO (ops[j]) + && ! find_reg_note (insn, REG_DEAD, op)) + alt_cost += 2; + + /* This is in place of ordinary cost computation for + this operand, so skip to the end of the + alternative (should be just one character). */ + while (*p && *p++ != ',') + ; + + constraints[i] = p; + continue; + } + } + + /* Scan all the constraint letters. See if the operand + matches any of the constraints. Collect the valid + register classes and see if this operand accepts + memory. */ + while ((c = *p)) + { + switch (c) + { + case ',': + break; + case '*': + /* Ignore the next letter for this pass. */ + c = *++p; + break; + + case '?': + alt_cost += 2; + case '!': case '#': case '&': + case '0': case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': + break; + + case 'p': + allows_addr = 1; + win = address_operand (op, GET_MODE (op)); + /* We know this operand is an address, so we want it + to be allocated to a register that can be the + base of an address, i.e. BASE_REG_CLASS. */ + classes[i] + = ira_reg_class_union[classes[i]] + [base_reg_class (VOIDmode, ADDRESS, SCRATCH)]; + break; + + case 'm': case 'o': case 'V': + /* It doesn't seem worth distinguishing between + offsettable and non-offsettable addresses + here. */ + allows_mem[i] = 1; + if (MEM_P (op)) + win = 1; + break; + + case '<': + if (MEM_P (op) + && (GET_CODE (XEXP (op, 0)) == PRE_DEC + || GET_CODE (XEXP (op, 0)) == POST_DEC)) + win = 1; + break; + + case '>': + if (MEM_P (op) + && (GET_CODE (XEXP (op, 0)) == PRE_INC + || GET_CODE (XEXP (op, 0)) == POST_INC)) + win = 1; + break; + + case 'E': + case 'F': + if (GET_CODE (op) == CONST_DOUBLE + || (GET_CODE (op) == CONST_VECTOR + && (GET_MODE_CLASS (GET_MODE (op)) + == MODE_VECTOR_FLOAT))) + win = 1; + break; + + case 'G': + case 'H': + if (GET_CODE (op) == CONST_DOUBLE + && CONST_DOUBLE_OK_FOR_CONSTRAINT_P (op, c, p)) + win = 1; + break; + + case 's': + if (GET_CODE (op) == CONST_INT + || (GET_CODE (op) == CONST_DOUBLE + && GET_MODE (op) == VOIDmode)) + break; + + case 'i': + if (CONSTANT_P (op) + && (! flag_pic || LEGITIMATE_PIC_OPERAND_P (op))) + win = 1; + break; + + case 'n': + if (GET_CODE (op) == CONST_INT + || (GET_CODE (op) == CONST_DOUBLE + && GET_MODE (op) == VOIDmode)) + win = 1; + break; + + case 'I': + case 'J': + case 'K': + case 'L': + case 'M': + case 'N': + case 'O': + case 'P': + if (GET_CODE (op) == CONST_INT + && CONST_OK_FOR_CONSTRAINT_P (INTVAL (op), c, p)) + win = 1; + break; + + case 'X': + win = 1; + break; + + case 'g': + if (MEM_P (op) + || (CONSTANT_P (op) + && (! flag_pic || LEGITIMATE_PIC_OPERAND_P (op)))) + win = 1; + allows_mem[i] = 1; + case 'r': + classes[i] = ira_reg_class_union[classes[i]][GENERAL_REGS]; + break; + + default: + if (REG_CLASS_FROM_CONSTRAINT (c, p) != NO_REGS) + classes[i] = ira_reg_class_union[classes[i]] + [REG_CLASS_FROM_CONSTRAINT (c, p)]; +#ifdef EXTRA_CONSTRAINT_STR + else if (EXTRA_CONSTRAINT_STR (op, c, p)) + win = 1; + + if (EXTRA_MEMORY_CONSTRAINT (c, p)) + { + /* Every MEM can be reloaded to fit. */ + allows_mem[i] = 1; + if (MEM_P (op)) + win = 1; + } + if (EXTRA_ADDRESS_CONSTRAINT (c, p)) + { + /* Every address can be reloaded to fit. */ + allows_addr = 1; + if (address_operand (op, GET_MODE (op))) + win = 1; + /* We know this operand is an address, so we + want it to be allocated to a hard register + that can be the base of an address, + i.e. BASE_REG_CLASS. */ + classes[i] + = ira_reg_class_union[classes[i]] + [base_reg_class (VOIDmode, ADDRESS, SCRATCH)]; + } +#endif + break; + } + p += CONSTRAINT_LEN (c, p); + if (c == ',') + break; + } + + constraints[i] = p; + + /* How we account for this operand now depends on whether it + is a pseudo register or not. If it is, we first check if + any register classes are valid. If not, we ignore this + alternative, since we want to assume that all allocnos get + allocated for register preferencing. If some register + class is valid, compute the costs of moving the allocno + into that class. */ + if (REG_P (op) && REGNO (op) >= FIRST_PSEUDO_REGISTER) + { + if (classes[i] == NO_REGS) + { + /* We must always fail if the operand is a REG, but + we did not find a suitable class. + + Otherwise we may perform an uninitialized read + from this_op_costs after the `continue' statement + below. */ + alt_fail = 1; + } + else + { + struct costs *pp = this_op_costs[i]; + + if (ira_register_move_cost[mode] == NULL) + ira_init_register_move_cost (mode); + + for (k = 0; k < cost_classes_num; k++) + { + rclass = cost_classes[k]; + pp->cost[k] + = ((recog_data.operand_type[i] != OP_OUT + ? ira_may_move_in_cost[mode][rclass] + [classes[i]] * frequency : 0) + + (recog_data.operand_type[i] != OP_IN + ? ira_may_move_out_cost[mode][classes[i]] + [rclass] * frequency : 0)); + } + + /* If the alternative actually allows memory, make + things a bit cheaper since we won't need an extra + insn to load it. */ + pp->mem_cost + = ((recog_data.operand_type[i] != OP_IN + ? ira_memory_move_cost[mode][classes[i]][0] : 0) + + (recog_data.operand_type[i] != OP_OUT + ? ira_memory_move_cost[mode][classes[i]][1] : 0) + - allows_mem[i]) * frequency; + /* If we have assigned a class to this allocno in our + first pass, add a cost to this alternative + corresponding to what we would add if this allocno + were not in the appropriate class. We could use + cover class here but it is less accurate + approximation. */ + if (allocno_pref) + { + enum reg_class pref_class + = allocno_pref[ALLOCNO_NUM + (ira_curr_regno_allocno_map + [REGNO (op)])]; + + if (pref_class == NO_REGS) + alt_cost + += ((recog_data.operand_type[i] != OP_IN + ? ira_memory_move_cost[mode][classes[i]][0] + : 0) + + (recog_data.operand_type[i] != OP_OUT + ? ira_memory_move_cost[mode][classes[i]][1] + : 0)); + else if (ira_reg_class_intersect[pref_class][classes[i]] + == NO_REGS) + alt_cost += (ira_register_move_cost + [mode][pref_class][classes[i]]); + } + } + } + + /* Otherwise, if this alternative wins, either because we + have already determined that or if we have a hard + register of the proper class, there is no cost for this + alternative. */ + else if (win || (REG_P (op) + && reg_fits_class_p (op, classes[i], + 0, GET_MODE (op)))) + ; + + /* If registers are valid, the cost of this alternative + includes copying the object to and/or from a + register. */ + else if (classes[i] != NO_REGS) + { + if (recog_data.operand_type[i] != OP_OUT) + alt_cost += copy_cost (op, mode, classes[i], 1, NULL); + + if (recog_data.operand_type[i] != OP_IN) + alt_cost += copy_cost (op, mode, classes[i], 0, NULL); + } + /* The only other way this alternative can be used is if + this is a constant that could be placed into memory. */ + else if (CONSTANT_P (op) && (allows_addr || allows_mem[i])) + alt_cost += ira_memory_move_cost[mode][classes[i]][1]; + else + alt_fail = 1; + } + + if (alt_fail) + continue; + + op_cost_add = alt_cost * frequency; + /* Finally, update the costs with the information we've + calculated about this alternative. */ + for (i = 0; i < n_ops; i++) + if (REG_P (ops[i]) && REGNO (ops[i]) >= FIRST_PSEUDO_REGISTER) + { + struct costs *pp = op_costs[i], *qq = this_op_costs[i]; + int scale = 1 + (recog_data.operand_type[i] == OP_INOUT); + + pp->mem_cost = MIN (pp->mem_cost, + (qq->mem_cost + op_cost_add) * scale); + + for (k = 0; k < cost_classes_num; k++) + pp->cost[k] + = MIN (pp->cost[k], (qq->cost[k] + op_cost_add) * scale); + } + } + + /* If this insn is a single set copying operand 1 to operand 0 and + one operand is an allocno with the other a hard reg or an allocno + that prefers a hard register that is in its own register class + then we may want to adjust the cost of that register class to -1. + + Avoid the adjustment if the source does not die to avoid + stressing of register allocator by preferrencing two colliding + registers into single class. + + Also avoid the adjustment if a copy between hard registers of the + class is expensive (ten times the cost of a default copy is + considered arbitrarily expensive). This avoids losing when the + preferred class is very expensive as the source of a copy + instruction. */ + if ((set = single_set (insn)) != 0 + && ops[0] == SET_DEST (set) && ops[1] == SET_SRC (set) + && REG_P (ops[0]) && REG_P (ops[1]) + && find_regno_note (insn, REG_DEAD, REGNO (ops[1]))) + for (i = 0; i <= 1; i++) + if (REGNO (ops[i]) >= FIRST_PSEUDO_REGISTER) + { + unsigned int regno = REGNO (ops[!i]); + enum machine_mode mode = GET_MODE (ops[!i]); + int rclass; + unsigned int nr; + + if (regno < FIRST_PSEUDO_REGISTER) + for (k = 0; k < cost_classes_num; k++) + { + rclass = cost_classes[k]; + if (TEST_HARD_REG_BIT (reg_class_contents[rclass], regno) + && (reg_class_size[rclass] + == (unsigned) CLASS_MAX_NREGS (rclass, mode))) + { + if (reg_class_size[rclass] == 1) + op_costs[i]->cost[k] = -frequency; + else + { + for (nr = 0; + nr < (unsigned) hard_regno_nregs[regno][mode]; + nr++) + if (! TEST_HARD_REG_BIT (reg_class_contents[rclass], + regno + nr)) + break; + + if (nr == (unsigned) hard_regno_nregs[regno][mode]) + op_costs[i]->cost[k] = -frequency; + } + } + } + } +} + + + +/* Wrapper around REGNO_OK_FOR_INDEX_P, to allow pseudo registers. */ +static inline bool +ok_for_index_p_nonstrict (rtx reg) +{ + unsigned regno = REGNO (reg); + + return regno >= FIRST_PSEUDO_REGISTER || REGNO_OK_FOR_INDEX_P (regno); +} + +/* A version of regno_ok_for_base_p for use here, when all + pseudo-registers should count as OK. Arguments as for + regno_ok_for_base_p. */ +static inline bool +ok_for_base_p_nonstrict (rtx reg, enum machine_mode mode, + enum rtx_code outer_code, enum rtx_code index_code) +{ + unsigned regno = REGNO (reg); + + if (regno >= FIRST_PSEUDO_REGISTER) + return true; + return ok_for_base_p_1 (regno, mode, outer_code, index_code); +} + +/* Record the pseudo registers we must reload into hard registers in a + subexpression of a memory address, X. + + If CONTEXT is 0, we are looking at the base part of an address, + otherwise we are looking at the index part. + + MODE is the mode of the memory reference; OUTER_CODE and INDEX_CODE + give the context that the rtx appears in. These three arguments + are passed down to base_reg_class. + + SCALE is twice the amount to multiply the cost by (it is twice so + we can represent half-cost adjustments). */ +static void +record_address_regs (enum machine_mode mode, rtx x, int context, + enum rtx_code outer_code, enum rtx_code index_code, + int scale) +{ + enum rtx_code code = GET_CODE (x); + enum reg_class rclass; + + if (context == 1) + rclass = INDEX_REG_CLASS; + else + rclass = base_reg_class (mode, outer_code, index_code); + + switch (code) + { + case CONST_INT: + case CONST: + case CC0: + case PC: + case SYMBOL_REF: + case LABEL_REF: + return; + + case PLUS: + /* When we have an address that is a sum, we must determine + whether registers are "base" or "index" regs. If there is a + sum of two registers, we must choose one to be the "base". + Luckily, we can use the REG_POINTER to make a good choice + most of the time. We only need to do this on machines that + can have two registers in an address and where the base and + index register classes are different. + + ??? This code used to set REGNO_POINTER_FLAG in some cases, + but that seems bogus since it should only be set when we are + sure the register is being used as a pointer. */ + { + rtx arg0 = XEXP (x, 0); + rtx arg1 = XEXP (x, 1); + enum rtx_code code0 = GET_CODE (arg0); + enum rtx_code code1 = GET_CODE (arg1); + + /* Look inside subregs. */ + if (code0 == SUBREG) + arg0 = SUBREG_REG (arg0), code0 = GET_CODE (arg0); + if (code1 == SUBREG) + arg1 = SUBREG_REG (arg1), code1 = GET_CODE (arg1); + + /* If this machine only allows one register per address, it + must be in the first operand. */ + if (MAX_REGS_PER_ADDRESS == 1) + record_address_regs (mode, arg0, 0, PLUS, code1, scale); + + /* If index and base registers are the same on this machine, + just record registers in any non-constant operands. We + assume here, as well as in the tests below, that all + addresses are in canonical form. */ + else if (INDEX_REG_CLASS == base_reg_class (VOIDmode, PLUS, SCRATCH)) + { + record_address_regs (mode, arg0, context, PLUS, code1, scale); + if (! CONSTANT_P (arg1)) + record_address_regs (mode, arg1, context, PLUS, code0, scale); + } + + /* If the second operand is a constant integer, it doesn't + change what class the first operand must be. */ + else if (code1 == CONST_INT || code1 == CONST_DOUBLE) + record_address_regs (mode, arg0, context, PLUS, code1, scale); + /* If the second operand is a symbolic constant, the first + operand must be an index register. */ + else if (code1 == SYMBOL_REF || code1 == CONST || code1 == LABEL_REF) + record_address_regs (mode, arg0, 1, PLUS, code1, scale); + /* If both operands are registers but one is already a hard + register of index or reg-base class, give the other the + class that the hard register is not. */ + else if (code0 == REG && code1 == REG + && REGNO (arg0) < FIRST_PSEUDO_REGISTER + && (ok_for_base_p_nonstrict (arg0, mode, PLUS, REG) + || ok_for_index_p_nonstrict (arg0))) + record_address_regs (mode, arg1, + ok_for_base_p_nonstrict (arg0, mode, PLUS, REG) + ? 1 : 0, + PLUS, REG, scale); + else if (code0 == REG && code1 == REG + && REGNO (arg1) < FIRST_PSEUDO_REGISTER + && (ok_for_base_p_nonstrict (arg1, mode, PLUS, REG) + || ok_for_index_p_nonstrict (arg1))) + record_address_regs (mode, arg0, + ok_for_base_p_nonstrict (arg1, mode, PLUS, REG) + ? 1 : 0, + PLUS, REG, scale); + /* If one operand is known to be a pointer, it must be the + base with the other operand the index. Likewise if the + other operand is a MULT. */ + else if ((code0 == REG && REG_POINTER (arg0)) || code1 == MULT) + { + record_address_regs (mode, arg0, 0, PLUS, code1, scale); + record_address_regs (mode, arg1, 1, PLUS, code0, scale); + } + else if ((code1 == REG && REG_POINTER (arg1)) || code0 == MULT) + { + record_address_regs (mode, arg0, 1, PLUS, code1, scale); + record_address_regs (mode, arg1, 0, PLUS, code0, scale); + } + /* Otherwise, count equal chances that each might be a base or + index register. This case should be rare. */ + else + { + record_address_regs (mode, arg0, 0, PLUS, code1, scale / 2); + record_address_regs (mode, arg0, 1, PLUS, code1, scale / 2); + record_address_regs (mode, arg1, 0, PLUS, code0, scale / 2); + record_address_regs (mode, arg1, 1, PLUS, code0, scale / 2); + } + } + break; + + /* Double the importance of an allocno that is incremented or + decremented, since it would take two extra insns if it ends + up in the wrong place. */ + case POST_MODIFY: + case PRE_MODIFY: + record_address_regs (mode, XEXP (x, 0), 0, code, + GET_CODE (XEXP (XEXP (x, 1), 1)), 2 * scale); + if (REG_P (XEXP (XEXP (x, 1), 1))) + record_address_regs (mode, XEXP (XEXP (x, 1), 1), 1, code, REG, + 2 * scale); + break; + + case POST_INC: + case PRE_INC: + case POST_DEC: + case PRE_DEC: + /* Double the importance of an allocno that is incremented or + decremented, since it would take two extra insns if it ends + up in the wrong place. If the operand is a pseudo-register, + show it is being used in an INC_DEC context. */ +#ifdef FORBIDDEN_INC_DEC_CLASSES + if (REG_P (XEXP (x, 0)) + && REGNO (XEXP (x, 0)) >= FIRST_PSEUDO_REGISTER) + in_inc_dec[ALLOCNO_NUM (ira_curr_regno_allocno_map + [REGNO (XEXP (x, 0))])] = true; +#endif + record_address_regs (mode, XEXP (x, 0), 0, code, SCRATCH, 2 * scale); + break; + + case REG: + { + struct costs *pp; + int i, k; + + if (REGNO (x) < FIRST_PSEUDO_REGISTER) + break; + + pp = COSTS_OF_ALLOCNO (allocno_costs, + ALLOCNO_NUM (ira_curr_regno_allocno_map + [REGNO (x)])); + pp->mem_cost += (ira_memory_move_cost[Pmode][rclass][1] * scale) / 2; + if (ira_register_move_cost[Pmode] == NULL) + ira_init_register_move_cost (Pmode); + for (k = 0; k < cost_classes_num; k++) + { + i = cost_classes[k]; + pp->cost[k] + += (ira_may_move_in_cost[Pmode][i][rclass] * scale) / 2; + } + } + break; + + default: + { + const char *fmt = GET_RTX_FORMAT (code); + int i; + for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--) + if (fmt[i] == 'e') + record_address_regs (mode, XEXP (x, i), context, code, SCRATCH, + scale); + } + } +} + + + +/* Calculate the costs of insn operands. */ +static void +record_operand_costs (rtx insn, struct costs **op_costs, + enum reg_class *allocno_pref) +{ + const char *constraints[MAX_RECOG_OPERANDS]; + enum machine_mode modes[MAX_RECOG_OPERANDS]; + int i; + + for (i = 0; i < recog_data.n_operands; i++) + { + constraints[i] = recog_data.constraints[i]; + modes[i] = recog_data.operand_mode[i]; + } + + /* If we get here, we are set up to record the costs of all the + operands for this insn. Start by initializing the costs. Then + handle any address registers. Finally record the desired classes + for any allocnos, doing it twice if some pair of operands are + commutative. */ + for (i = 0; i < recog_data.n_operands; i++) + { + memcpy (op_costs[i], init_cost, struct_costs_size); + + if (GET_CODE (recog_data.operand[i]) == SUBREG) + recog_data.operand[i] = SUBREG_REG (recog_data.operand[i]); + + if (MEM_P (recog_data.operand[i])) + record_address_regs (GET_MODE (recog_data.operand[i]), + XEXP (recog_data.operand[i], 0), + 0, MEM, SCRATCH, frequency * 2); + else if (constraints[i][0] == 'p' + || EXTRA_ADDRESS_CONSTRAINT (constraints[i][0], + constraints[i])) + record_address_regs (VOIDmode, recog_data.operand[i], 0, ADDRESS, + SCRATCH, frequency * 2); + } + + /* Check for commutative in a separate loop so everything will have + been initialized. We must do this even if one operand is a + constant--see addsi3 in m68k.md. */ + for (i = 0; i < (int) recog_data.n_operands - 1; i++) + if (constraints[i][0] == '%') + { + const char *xconstraints[MAX_RECOG_OPERANDS]; + int j; + + /* Handle commutative operands by swapping the constraints. + We assume the modes are the same. */ + for (j = 0; j < recog_data.n_operands; j++) + xconstraints[j] = constraints[j]; + + xconstraints[i] = constraints[i+1]; + xconstraints[i+1] = constraints[i]; + record_reg_classes (recog_data.n_alternatives, recog_data.n_operands, + recog_data.operand, modes, + xconstraints, insn, op_costs, allocno_pref); + } + record_reg_classes (recog_data.n_alternatives, recog_data.n_operands, + recog_data.operand, modes, + constraints, insn, op_costs, allocno_pref); +} + + + +/* Process one insn INSN. Scan it and record each time it would save + code to put a certain allocnos in a certain class. Return the last + insn processed, so that the scan can be continued from there. */ +static rtx +scan_one_insn (rtx insn) +{ + enum rtx_code pat_code; + rtx set, note; + int i, k; + + if (!INSN_P (insn)) + return insn; + + pat_code = GET_CODE (PATTERN (insn)); + if (pat_code == USE || pat_code == CLOBBER || pat_code == ASM_INPUT + || pat_code == ADDR_VEC || pat_code == ADDR_DIFF_VEC) + return insn; + + set = single_set (insn); + extract_insn (insn); + + /* If this insn loads a parameter from its stack slot, then it + represents a savings, rather than a cost, if the parameter is + stored in memory. Record this fact. */ + if (set != 0 && REG_P (SET_DEST (set)) && MEM_P (SET_SRC (set)) + && (note = find_reg_note (insn, REG_EQUIV, NULL_RTX)) != NULL_RTX + && MEM_P (XEXP (note, 0))) + { + COSTS_OF_ALLOCNO (allocno_costs, + ALLOCNO_NUM (ira_curr_regno_allocno_map + [REGNO (SET_DEST (set))]))->mem_cost + -= (ira_memory_move_cost[GET_MODE (SET_DEST (set))][GENERAL_REGS][1] + * frequency); + record_address_regs (GET_MODE (SET_SRC (set)), XEXP (SET_SRC (set), 0), + 0, MEM, SCRATCH, frequency * 2); + } + + record_operand_costs (insn, op_costs, allocno_pref); + + /* Now add the cost for each operand to the total costs for its + allocno. */ + for (i = 0; i < recog_data.n_operands; i++) + if (REG_P (recog_data.operand[i]) + && REGNO (recog_data.operand[i]) >= FIRST_PSEUDO_REGISTER) + { + int regno = REGNO (recog_data.operand[i]); + struct costs *p + = COSTS_OF_ALLOCNO (allocno_costs, + ALLOCNO_NUM (ira_curr_regno_allocno_map[regno])); + struct costs *q = op_costs[i]; + + p->mem_cost += q->mem_cost; + for (k = 0; k < cost_classes_num; k++) + p->cost[k] += q->cost[k]; + } + + return insn; +} + + + +/* Print allocnos costs to file F. */ +static void +print_costs (FILE *f) +{ + int k; + ira_allocno_t a; + ira_allocno_iterator ai; + + fprintf (f, "\n"); + FOR_EACH_ALLOCNO (a, ai) + { + int i, rclass; + basic_block bb; + int regno = ALLOCNO_REGNO (a); + + i = ALLOCNO_NUM (a); + fprintf (f, " a%d(r%d,", i, regno); + if ((bb = ALLOCNO_LOOP_TREE_NODE (a)->bb) != NULL) + fprintf (f, "b%d", bb->index); + else + fprintf (f, "l%d", ALLOCNO_LOOP_TREE_NODE (a)->loop->num); + fprintf (f, ") costs:"); + for (k = 0; k < cost_classes_num; k++) + { + rclass = cost_classes[k]; + if (contains_reg_of_mode[rclass][PSEUDO_REGNO_MODE (regno)] +#ifdef FORBIDDEN_INC_DEC_CLASSES + && (! in_inc_dec[i] || ! forbidden_inc_dec_class[rclass]) +#endif +#ifdef CANNOT_CHANGE_MODE_CLASS + && ! invalid_mode_change_p (regno, (enum reg_class) rclass, + PSEUDO_REGNO_MODE (regno)) +#endif + ) + { + fprintf (f, " %s:%d", reg_class_names[rclass], + COSTS_OF_ALLOCNO (allocno_costs, i)->cost[k]); + if (flag_ira_algorithm == IRA_ALGORITHM_REGIONAL + || flag_ira_algorithm == IRA_ALGORITHM_MIXED) + fprintf (f, ",%d", COSTS_OF_ALLOCNO (total_costs, i)->cost[k]); + } + } + fprintf (f, " MEM:%i\n", COSTS_OF_ALLOCNO (allocno_costs, i)->mem_cost); + } +} + +/* Traverse the BB represented by LOOP_TREE_NODE to update the allocno + costs. */ +static void +process_bb_node_for_costs (ira_loop_tree_node_t loop_tree_node) +{ + basic_block bb; + rtx insn; + + bb = loop_tree_node->bb; + if (bb == NULL) + return; + frequency = REG_FREQ_FROM_BB (bb); + if (frequency == 0) + frequency = 1; + FOR_BB_INSNS (bb, insn) + insn = scan_one_insn (insn); +} + +/* Find costs of register classes and memory for allocnos and their + best costs. */ +static void +find_allocno_class_costs (void) +{ + int i, k; + int pass; + basic_block bb; + + init_recog (); +#ifdef FORBIDDEN_INC_DEC_CLASSES + in_inc_dec = ira_allocate (sizeof (bool) * ira_allocnos_num); +#endif /* FORBIDDEN_INC_DEC_CLASSES */ + allocno_pref = NULL; + /* Normally we scan the insns once and determine the best class to + use for each allocno. However, if -fexpensive-optimizations are + on, we do so twice, the second time using the tentative best + classes to guide the selection. */ + for (pass = 0; pass <= flag_expensive_optimizations; pass++) + { + if (internal_flag_ira_verbose > 0 && ira_dump_file) + fprintf (ira_dump_file, "\nPass %i for finding allocno costs\n\n", + pass); + /* We could use only cover classes. Unfortunately it does not + work well for some targets where some subclass of cover class + is costly and wrong cover class is chosen. */ + for (cost_classes_num = 0; + cost_classes_num < ira_important_classes_num; + cost_classes_num++) + { + cost_classes[cost_classes_num] + = ira_important_classes[cost_classes_num]; + cost_class_nums[cost_classes[cost_classes_num]] + = cost_classes_num; + } + struct_costs_size + = sizeof (struct costs) + sizeof (int) * (cost_classes_num - 1); + /* Zero out our accumulation of the cost of each class for each + allocno. */ + memset (allocno_costs, 0, ira_allocnos_num * struct_costs_size); +#ifdef FORBIDDEN_INC_DEC_CLASSES + memset (in_inc_dec, 0, ira_allocnos_num * sizeof (bool)); +#endif + + /* Scan the instructions and record each time it would save code + to put a certain allocno in a certain class. */ + ira_traverse_loop_tree (true, ira_loop_tree_root, + process_bb_node_for_costs, NULL); + + memcpy (total_costs, allocno_costs, + max_struct_costs_size * ira_allocnos_num); + if (pass == 0) + allocno_pref = allocno_pref_buffer; + + /* Now for each allocno look at how desirable each class is and + find which class is preferred. */ + for (i = max_reg_num () - 1; i >= FIRST_PSEUDO_REGISTER; i--) + { + ira_allocno_t a, parent_a; + int rclass, a_num, parent_a_num; + ira_loop_tree_node_t parent; + int best_cost; + enum reg_class best, alt_class, common_class; +#ifdef FORBIDDEN_INC_DEC_CLASSES + int inc_dec_p = false; +#endif + + if (ira_regno_allocno_map[i] == NULL) + continue; + memset (temp_costs, 0, struct_costs_size); + /* Find cost of all allocnos with the same regno. */ + for (a = ira_regno_allocno_map[i]; + a != NULL; + a = ALLOCNO_NEXT_REGNO_ALLOCNO (a)) + { + a_num = ALLOCNO_NUM (a); + if ((flag_ira_algorithm == IRA_ALGORITHM_REGIONAL + || flag_ira_algorithm == IRA_ALGORITHM_MIXED) + && (parent = ALLOCNO_LOOP_TREE_NODE (a)->parent) != NULL + && (parent_a = parent->regno_allocno_map[i]) != NULL + /* There are no caps yet. */ + && bitmap_bit_p (ALLOCNO_LOOP_TREE_NODE (a)->border_allocnos, + ALLOCNO_NUM (a))) + { + /* Propagate costs to upper levels in the region + tree. */ + parent_a_num = ALLOCNO_NUM (parent_a); + for (k = 0; k < cost_classes_num; k++) + COSTS_OF_ALLOCNO (total_costs, parent_a_num)->cost[k] + += COSTS_OF_ALLOCNO (total_costs, a_num)->cost[k]; + COSTS_OF_ALLOCNO (total_costs, parent_a_num)->mem_cost + += COSTS_OF_ALLOCNO (total_costs, a_num)->mem_cost; + } + for (k = 0; k < cost_classes_num; k++) + temp_costs->cost[k] + += COSTS_OF_ALLOCNO (allocno_costs, a_num)->cost[k]; + temp_costs->mem_cost + += COSTS_OF_ALLOCNO (allocno_costs, a_num)->mem_cost; +#ifdef FORBIDDEN_INC_DEC_CLASSES + if (in_inc_dec[a_num]) + inc_dec_p = true; +#endif + } + best_cost = (1 << (HOST_BITS_PER_INT - 2)) - 1; + best = ALL_REGS; + alt_class = NO_REGS; + /* Find best common class for all allocnos with the same + regno. */ + for (k = 0; k < cost_classes_num; k++) + { + rclass = cost_classes[k]; + /* Ignore classes that are too small for this operand or + invalid for an operand that was auto-incremented. */ + if (! contains_reg_of_mode[rclass][PSEUDO_REGNO_MODE (i)] +#ifdef FORBIDDEN_INC_DEC_CLASSES + || (inc_dec_p && forbidden_inc_dec_class[rclass]) +#endif +#ifdef CANNOT_CHANGE_MODE_CLASS + || invalid_mode_change_p (i, (enum reg_class) rclass, + PSEUDO_REGNO_MODE (i)) +#endif + ) + continue; + if (temp_costs->cost[k] < best_cost) + { + best_cost = temp_costs->cost[k]; + best = (enum reg_class) rclass; + } + else if (temp_costs->cost[k] == best_cost) + best = ira_reg_class_union[best][rclass]; + if (pass == flag_expensive_optimizations + && temp_costs->cost[k] < temp_costs->mem_cost + && (reg_class_size[reg_class_subunion[alt_class][rclass]] + > reg_class_size[alt_class])) + alt_class = reg_class_subunion[alt_class][rclass]; + } + if (pass == flag_expensive_optimizations) + { + if (best_cost > temp_costs->mem_cost) + best = alt_class = NO_REGS; + else if (best == alt_class) + alt_class = NO_REGS; + setup_reg_classes (i, best, alt_class); + if (internal_flag_ira_verbose > 2 && ira_dump_file != NULL) + fprintf (ira_dump_file, + " r%d: preferred %s, alternative %s\n", + i, reg_class_names[best], reg_class_names[alt_class]); + } + if (best_cost > temp_costs->mem_cost) + common_class = NO_REGS; + else + /* Make the common class a cover class. Remember all + allocnos with the same regno should have the same cover + class. */ + common_class = ira_class_translate[best]; + for (a = ira_regno_allocno_map[i]; + a != NULL; + a = ALLOCNO_NEXT_REGNO_ALLOCNO (a)) + { + a_num = ALLOCNO_NUM (a); + if (common_class == NO_REGS) + best = NO_REGS; + else + { + /* Finding best class which is subset of the common + class. */ + best_cost = (1 << (HOST_BITS_PER_INT - 2)) - 1; + best = ALL_REGS; + for (k = 0; k < cost_classes_num; k++) + { + rclass = cost_classes[k]; + if (! ira_class_subset_p[rclass][common_class]) + continue; + /* Ignore classes that are too small for this + operand or invalid for an operand that was + auto-incremented. */ + if (! contains_reg_of_mode[rclass][PSEUDO_REGNO_MODE (i)] +#ifdef FORBIDDEN_INC_DEC_CLASSES + || (inc_dec_p && forbidden_inc_dec_class[rclass]) +#endif +#ifdef CANNOT_CHANGE_MODE_CLASS + || invalid_mode_change_p (i, (enum reg_class) rclass, + PSEUDO_REGNO_MODE (i)) +#endif + ) + ; + else if (COSTS_OF_ALLOCNO (total_costs, a_num)->cost[k] + < best_cost) + { + best_cost + = COSTS_OF_ALLOCNO (total_costs, a_num)->cost[k]; + best = (enum reg_class) rclass; + } + else if (COSTS_OF_ALLOCNO (total_costs, a_num)->cost[k] + == best_cost) + best = ira_reg_class_union[best][rclass]; + } + } + if (internal_flag_ira_verbose > 2 && ira_dump_file != NULL + && (pass == 0 || allocno_pref[a_num] != best)) + { + fprintf (ira_dump_file, " a%d (r%d,", a_num, i); + if ((bb = ALLOCNO_LOOP_TREE_NODE (a)->bb) != NULL) + fprintf (ira_dump_file, "b%d", bb->index); + else + fprintf (ira_dump_file, "l%d", + ALLOCNO_LOOP_TREE_NODE (a)->loop->num); + fprintf (ira_dump_file, ") best %s, cover %s\n", + reg_class_names[best], + reg_class_names[ira_class_translate[best]]); + } + allocno_pref[a_num] = best; + } + } + + if (internal_flag_ira_verbose > 4 && ira_dump_file) + { + print_costs (ira_dump_file); + fprintf (ira_dump_file,"\n"); + } + } +#ifdef FORBIDDEN_INC_DEC_CLASSES + ira_free (in_inc_dec); +#endif +} + + + +/* Process moves involving hard regs to modify allocno hard register + costs. We can do this only after determining allocno cover class. + If a hard register forms a register class, than moves with the hard + register are already taken into account in class costs for the + allocno. */ +static void +process_bb_node_for_hard_reg_moves (ira_loop_tree_node_t loop_tree_node) +{ + int i, freq, cost, src_regno, dst_regno, hard_regno; + bool to_p; + ira_allocno_t a; + enum reg_class rclass, hard_reg_class; + enum machine_mode mode; + basic_block bb; + rtx insn, set, src, dst; + + bb = loop_tree_node->bb; + if (bb == NULL) + return; + freq = REG_FREQ_FROM_BB (bb); + if (freq == 0) + freq = 1; + FOR_BB_INSNS (bb, insn) + { + if (! INSN_P (insn)) + continue; + set = single_set (insn); + if (set == NULL_RTX) + continue; + dst = SET_DEST (set); + src = SET_SRC (set); + if (! REG_P (dst) || ! REG_P (src)) + continue; + dst_regno = REGNO (dst); + src_regno = REGNO (src); + if (dst_regno >= FIRST_PSEUDO_REGISTER + && src_regno < FIRST_PSEUDO_REGISTER) + { + hard_regno = src_regno; + to_p = true; + a = ira_curr_regno_allocno_map[dst_regno]; + } + else if (src_regno >= FIRST_PSEUDO_REGISTER + && dst_regno < FIRST_PSEUDO_REGISTER) + { + hard_regno = dst_regno; + to_p = false; + a = ira_curr_regno_allocno_map[src_regno]; + } + else + continue; + rclass = ALLOCNO_COVER_CLASS (a); + if (! TEST_HARD_REG_BIT (reg_class_contents[rclass], hard_regno)) + continue; + i = ira_class_hard_reg_index[rclass][hard_regno]; + if (i < 0) + continue; + mode = ALLOCNO_MODE (a); + hard_reg_class = REGNO_REG_CLASS (hard_regno); + cost = (to_p ? ira_register_move_cost[mode][hard_reg_class][rclass] + : ira_register_move_cost[mode][rclass][hard_reg_class]) * freq; + ira_allocate_and_set_costs (&ALLOCNO_HARD_REG_COSTS (a), rclass, + ALLOCNO_COVER_CLASS_COST (a)); + ira_allocate_and_set_costs (&ALLOCNO_CONFLICT_HARD_REG_COSTS (a), + rclass, 0); + ALLOCNO_HARD_REG_COSTS (a)[i] -= cost; + ALLOCNO_CONFLICT_HARD_REG_COSTS (a)[i] -= cost; + ALLOCNO_COVER_CLASS_COST (a) = MIN (ALLOCNO_COVER_CLASS_COST (a), + ALLOCNO_HARD_REG_COSTS (a)[i]); + } +} + +/* After we find hard register and memory costs for allocnos, define + its cover class and modify hard register cost because insns moving + allocno to/from hard registers. */ +static void +setup_allocno_cover_class_and_costs (void) +{ + int i, j, n, regno; + int *reg_costs; + enum reg_class cover_class, rclass; + enum machine_mode mode; + ira_allocno_t a; + ira_allocno_iterator ai; + + FOR_EACH_ALLOCNO (a, ai) + { + i = ALLOCNO_NUM (a); + mode = ALLOCNO_MODE (a); + cover_class = ira_class_translate[allocno_pref[i]]; + ira_assert (allocno_pref[i] == NO_REGS || cover_class != NO_REGS); + ALLOCNO_MEMORY_COST (a) = ALLOCNO_UPDATED_MEMORY_COST (a) + = COSTS_OF_ALLOCNO (allocno_costs, i)->mem_cost; + ira_set_allocno_cover_class (a, cover_class); + if (cover_class == NO_REGS) + continue; + ALLOCNO_AVAILABLE_REGS_NUM (a) = ira_available_class_regs[cover_class]; + ALLOCNO_COVER_CLASS_COST (a) + = (COSTS_OF_ALLOCNO (allocno_costs, i) + ->cost[cost_class_nums[allocno_pref[i]]]); + if (optimize && ALLOCNO_COVER_CLASS (a) != allocno_pref[i]) + { + n = ira_class_hard_regs_num[cover_class]; + ALLOCNO_HARD_REG_COSTS (a) + = reg_costs = ira_allocate_cost_vector (cover_class); + for (j = n - 1; j >= 0; j--) + { + regno = ira_class_hard_regs[cover_class][j]; + rclass = REGNO_REG_CLASS (regno); + reg_costs[j] = (COSTS_OF_ALLOCNO (allocno_costs, i) + ->cost[cost_class_nums[rclass]]); + } + } + } + if (optimize) + ira_traverse_loop_tree (true, ira_loop_tree_root, + process_bb_node_for_hard_reg_moves, NULL); +} + + + +/* Function called once during compiler work. */ +void +ira_init_costs_once (void) +{ + int i; + + init_cost = NULL; + for (i = 0; i < MAX_RECOG_OPERANDS; i++) + { + op_costs[i] = NULL; + this_op_costs[i] = NULL; + } + temp_costs = NULL; + cost_classes = NULL; +} + +/* Free allocated temporary cost vectors. */ +static void +free_ira_costs (void) +{ + int i; + + if (init_cost != NULL) + free (init_cost); + init_cost = NULL; + for (i = 0; i < MAX_RECOG_OPERANDS; i++) + { + if (op_costs[i] != NULL) + free (op_costs[i]); + if (this_op_costs[i] != NULL) + free (this_op_costs[i]); + op_costs[i] = this_op_costs[i] = NULL; + } + if (temp_costs != NULL) + free (temp_costs); + temp_costs = NULL; + if (cost_classes != NULL) + free (cost_classes); + cost_classes = NULL; +} + +/* This is called each time register related information is + changed. */ +void +ira_init_costs (void) +{ + int i; + + free_ira_costs (); + max_struct_costs_size + = sizeof (struct costs) + sizeof (int) * (ira_important_classes_num - 1); + /* Don't use ira_allocate because vectors live through several IRA calls. */ + init_cost = (struct costs *) xmalloc (max_struct_costs_size); + init_cost->mem_cost = 1000000; + for (i = 0; i < ira_important_classes_num; i++) + init_cost->cost[i] = 1000000; + for (i = 0; i < MAX_RECOG_OPERANDS; i++) + { + op_costs[i] = (struct costs *) xmalloc (max_struct_costs_size); + this_op_costs[i] = (struct costs *) xmalloc (max_struct_costs_size); + } + temp_costs = (struct costs *) xmalloc (max_struct_costs_size); + cost_classes = (enum reg_class *) xmalloc (sizeof (enum reg_class) + * ira_important_classes_num); +} + +/* Function called once at the end of compiler work. */ +void +ira_finish_costs_once (void) +{ + free_ira_costs (); +} + + + +/* Entry function which defines cover class, memory and hard register + costs for each allocno. */ +void +ira_costs (void) +{ + ira_allocno_t a; + ira_allocno_iterator ai; + + allocno_costs = (struct costs *) ira_allocate (max_struct_costs_size + * ira_allocnos_num); + total_costs = (struct costs *) ira_allocate (max_struct_costs_size + * ira_allocnos_num); + allocno_pref_buffer + = (enum reg_class *) ira_allocate (sizeof (enum reg_class) + * ira_allocnos_num); + find_allocno_class_costs (); + setup_allocno_cover_class_and_costs (); + /* Because we could process operands only as subregs, check mode of + the registers themselves too. */ + FOR_EACH_ALLOCNO (a, ai) + if (ira_register_move_cost[ALLOCNO_MODE (a)] == NULL + && have_regs_of_mode[ALLOCNO_MODE (a)]) + ira_init_register_move_cost (ALLOCNO_MODE (a)); + ira_free (allocno_pref_buffer); + ira_free (total_costs); + ira_free (allocno_costs); +} + + + +/* Change hard register costs for allocnos which lives through + function calls. This is called only when we found all intersected + calls during building allocno live ranges. */ +void +ira_tune_allocno_costs_and_cover_classes (void) +{ + int j, n, regno; + int cost, min_cost, *reg_costs; + enum reg_class cover_class, rclass; + enum machine_mode mode; + ira_allocno_t a; + ira_allocno_iterator ai; + + FOR_EACH_ALLOCNO (a, ai) + { + cover_class = ALLOCNO_COVER_CLASS (a); + if (cover_class == NO_REGS) + continue; + mode = ALLOCNO_MODE (a); + n = ira_class_hard_regs_num[cover_class]; + min_cost = INT_MAX; + if (ALLOCNO_CALLS_CROSSED_NUM (a) != 0) + { + ira_allocate_and_set_costs + (&ALLOCNO_HARD_REG_COSTS (a), cover_class, + ALLOCNO_COVER_CLASS_COST (a)); + reg_costs = ALLOCNO_HARD_REG_COSTS (a); + for (j = n - 1; j >= 0; j--) + { + regno = ira_class_hard_regs[cover_class][j]; + rclass = REGNO_REG_CLASS (regno); + cost = 0; + /* ??? If only part is call clobbered. */ + if (! ira_hard_reg_not_in_set_p (regno, mode, call_used_reg_set)) + cost += (ALLOCNO_CALL_FREQ (a) + * (ira_memory_move_cost[mode][rclass][0] + + ira_memory_move_cost[mode][rclass][1])); +#ifdef IRA_HARD_REGNO_ADD_COST_MULTIPLIER + cost += ((ira_memory_move_cost[mode][rclass][0] + + ira_memory_move_cost[mode][rclass][1]) + * ALLOCNO_FREQ (a) + * IRA_HARD_REGNO_ADD_COST_MULTIPLIER (regno) / 2); +#endif + reg_costs[j] += cost; + if (min_cost > reg_costs[j]) + min_cost = reg_costs[j]; + } + } + if (min_cost != INT_MAX) + ALLOCNO_COVER_CLASS_COST (a) = min_cost; + } +} diff --git a/gcc/ira-emit.c b/gcc/ira-emit.c new file mode 100644 index 00000000000..d18be8021ab --- /dev/null +++ b/gcc/ira-emit.c @@ -0,0 +1,1019 @@ +/* Integrated Register Allocator. Changing code and generating moves. + Copyright (C) 2006, 2007, 2008 + Free Software Foundation, Inc. + Contributed by Vladimir Makarov <vmakarov@redhat.com>. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 3, or (at your option) any later +version. + +GCC is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +<http://www.gnu.org/licenses/>. */ + + +#include "config.h" +#include "system.h" +#include "coretypes.h" +#include "tm.h" +#include "regs.h" +#include "rtl.h" +#include "tm_p.h" +#include "target.h" +#include "flags.h" +#include "obstack.h" +#include "bitmap.h" +#include "hard-reg-set.h" +#include "basic-block.h" +#include "expr.h" +#include "recog.h" +#include "params.h" +#include "timevar.h" +#include "tree-pass.h" +#include "output.h" +#include "reload.h" +#include "errors.h" +#include "df.h" +#include "ira-int.h" + + +typedef struct move *move_t; + +/* The structure represents an allocno move. Both allocnos have the + same origional regno but different allocation. */ +struct move +{ + /* The allocnos involved in the move. */ + ira_allocno_t from, to; + /* The next move in the move sequence. */ + move_t next; + /* Used for finding dependencies. */ + bool visited_p; + /* The size of the following array. */ + int deps_num; + /* Moves on which given move depends on. Dependency can be cyclic. + It means we need a temporary to generates the moves. Sequence + A1->A2, B1->B2 where A1 and B2 are assigned to reg R1 and A2 and + B1 are assigned to reg R2 is an example of the cyclic + dependencies. */ + move_t *deps; + /* First insn generated for the move. */ + rtx insn; +}; + +/* Array of moves (indexed by BB index) which should be put at the + start/end of the corresponding basic blocks. */ +static move_t *at_bb_start, *at_bb_end; + +/* Max regno before renaming some pseudo-registers. For example, the + same pseudo-register can be renamed in a loop if its allocation is + different outside the loop. */ +static int max_regno_before_changing; + +/* Return new move of allocnos TO and FROM. */ +static move_t +create_move (ira_allocno_t to, ira_allocno_t from) +{ + move_t move; + + move = (move_t) ira_allocate (sizeof (struct move)); + move->deps = NULL; + move->deps_num = 0; + move->to = to; + move->from = from; + move->next = NULL; + move->insn = NULL_RTX; + move->visited_p = false; + return move; +} + +/* Free memory for MOVE and its dependencies. */ +static void +free_move (move_t move) +{ + if (move->deps != NULL) + ira_free (move->deps); + ira_free (move); +} + +/* Free memory for list of the moves given by its HEAD. */ +static void +free_move_list (move_t head) +{ + move_t next; + + for (; head != NULL; head = next) + { + next = head->next; + free_move (head); + } +} + +/* Return TRUE if the the move list LIST1 and LIST2 are equal (two + moves are equal if they involve the same allocnos). */ +static bool +eq_move_lists_p (move_t list1, move_t list2) +{ + for (; list1 != NULL && list2 != NULL; + list1 = list1->next, list2 = list2->next) + if (list1->from != list2->from || list1->to != list2->to) + return false; + return list1 == list2; +} + +/* This recursive function changes pseudo-registers in *LOC if it is + necessary. The function returns TRUE if a change was done. */ +static bool +change_regs (rtx *loc) +{ + int i, regno, result = false; + const char *fmt; + enum rtx_code code; + + if (*loc == NULL_RTX) + return false; + code = GET_CODE (*loc); + if (code == REG) + { + regno = REGNO (*loc); + if (regno < FIRST_PSEUDO_REGISTER) + return false; + if (regno >= max_regno_before_changing) + /* It is a shared register which was changed already. */ + return false; + if (ira_curr_regno_allocno_map[regno] == NULL) + return false; + *loc = ALLOCNO_REG (ira_curr_regno_allocno_map[regno]); + return true; + } + + fmt = GET_RTX_FORMAT (code); + for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--) + { + if (fmt[i] == 'e') + result = change_regs (&XEXP (*loc, i)) || result; + else if (fmt[i] == 'E') + { + int j; + + for (j = XVECLEN (*loc, i) - 1; j >= 0; j--) + result = change_regs (&XVECEXP (*loc, i, j)) || result; + } + } + return result; +} + +/* Attach MOVE to the edge E. The move is attached to the head of the + list if HEAD_P is TRUE. */ +static void +add_to_edge_list (edge e, move_t move, bool head_p) +{ + move_t last; + + if (head_p || e->aux == NULL) + { + move->next = (move_t) e->aux; + e->aux = move; + } + else + { + for (last = (move_t) e->aux; last->next != NULL; last = last->next) + ; + last->next = move; + move->next = NULL; + } +} + +/* Create and return new pseudo-register with the same attributes as + ORIGINAL_REG. */ +static rtx +create_new_reg (rtx original_reg) +{ + rtx new_reg; + + new_reg = gen_reg_rtx (GET_MODE (original_reg)); + ORIGINAL_REGNO (new_reg) = ORIGINAL_REGNO (original_reg); + REG_USERVAR_P (new_reg) = REG_USERVAR_P (original_reg); + REG_POINTER (new_reg) = REG_POINTER (original_reg); + REG_ATTRS (new_reg) = REG_ATTRS (original_reg); + if (internal_flag_ira_verbose > 3 && ira_dump_file != NULL) + fprintf (ira_dump_file, " Creating newreg=%i from oldreg=%i\n", + REGNO (new_reg), REGNO (original_reg)); + return new_reg; +} + +/* Return TRUE if loop given by SUBNODE inside the loop given by + NODE. */ +static bool +subloop_tree_node_p (ira_loop_tree_node_t subnode, ira_loop_tree_node_t node) +{ + for (; subnode != NULL; subnode = subnode->parent) + if (subnode == node) + return true; + return false; +} + +/* Set up member `reg' to REG for allocnos which has the same regno as + ALLOCNO and which are inside the loop corresponding to ALLOCNO. */ +static void +set_allocno_reg (ira_allocno_t allocno, rtx reg) +{ + int regno; + ira_allocno_t a; + ira_loop_tree_node_t node; + + node = ALLOCNO_LOOP_TREE_NODE (allocno); + for (a = ira_regno_allocno_map[ALLOCNO_REGNO (allocno)]; + a != NULL; + a = ALLOCNO_NEXT_REGNO_ALLOCNO (a)) + if (subloop_tree_node_p (ALLOCNO_LOOP_TREE_NODE (a), node)) + ALLOCNO_REG (a) = reg; + for (a = ALLOCNO_CAP (allocno); a != NULL; a = ALLOCNO_CAP (a)) + ALLOCNO_REG (a) = reg; + regno = ALLOCNO_REGNO (allocno); + for (a = allocno;;) + { + if (a == NULL || (a = ALLOCNO_CAP (a)) == NULL) + { + node = node->parent; + if (node == NULL) + break; + a = node->regno_allocno_map[regno]; + } + if (a == NULL) + continue; + if (ALLOCNO_CHILD_RENAMED_P (a)) + break; + ALLOCNO_CHILD_RENAMED_P (a) = true; + } +} + +/* Return TRUE if move of SRC_ALLOCNO to DEST_ALLOCNO does not change + value of the destination. One possible reason for this is the + situation when SRC_ALLOCNO is not modified in the corresponding + loop. */ +static bool +not_modified_p (ira_allocno_t src_allocno, ira_allocno_t dest_allocno) +{ + int regno, orig_regno; + ira_allocno_t a; + ira_loop_tree_node_t node; + + ira_assert (ALLOCNO_CAP_MEMBER (src_allocno) == NULL + && ALLOCNO_CAP_MEMBER (dest_allocno) == NULL); + orig_regno = ALLOCNO_REGNO (src_allocno); + regno = REGNO (ALLOCNO_REG (dest_allocno)); + for (node = ALLOCNO_LOOP_TREE_NODE (src_allocno); + node != NULL; + node = node->parent) + if ((a = node->regno_allocno_map[orig_regno]) == NULL) + break; + else if (REGNO (ALLOCNO_REG (a)) == (unsigned) regno) + return true; + else if (bitmap_bit_p (node->modified_regnos, orig_regno)) + return false; + return node != NULL; +} + +/* Generate and attach moves to the edge E. This looks at the final + regnos of allocnos living on the edge with the same original regno + to figure out when moves should be generated. */ +static void +generate_edge_moves (edge e) +{ + ira_loop_tree_node_t src_loop_node, dest_loop_node; + unsigned int regno; + bitmap_iterator bi; + ira_allocno_t src_allocno, dest_allocno, *src_map, *dest_map; + move_t move; + + src_loop_node = IRA_BB_NODE (e->src)->parent; + dest_loop_node = IRA_BB_NODE (e->dest)->parent; + e->aux = NULL; + if (src_loop_node == dest_loop_node) + return; + src_map = src_loop_node->regno_allocno_map; + dest_map = dest_loop_node->regno_allocno_map; + EXECUTE_IF_SET_IN_REG_SET (DF_LR_IN (e->dest), + FIRST_PSEUDO_REGISTER, regno, bi) + if (bitmap_bit_p (DF_LR_OUT (e->src), regno)) + { + src_allocno = src_map[regno]; + dest_allocno = dest_map[regno]; + if (REGNO (ALLOCNO_REG (src_allocno)) + == REGNO (ALLOCNO_REG (dest_allocno))) + continue; + /* Actually it is not a optimization we need this code because + the memory (remember about equivalent memory) might be ROM + (or placed in read only section). */ + if (ALLOCNO_HARD_REGNO (dest_allocno) < 0 + && ALLOCNO_HARD_REGNO (src_allocno) >= 0 + && not_modified_p (src_allocno, dest_allocno)) + { + ALLOCNO_MEM_OPTIMIZED_DEST (src_allocno) = dest_allocno; + ALLOCNO_MEM_OPTIMIZED_DEST_P (dest_allocno) = true; + if (internal_flag_ira_verbose > 3 && ira_dump_file != NULL) + fprintf (ira_dump_file, " Remove r%d:a%d->a%d(mem)\n", + regno, ALLOCNO_NUM (src_allocno), + ALLOCNO_NUM (dest_allocno)); + continue; + } + move = create_move (dest_allocno, src_allocno); + add_to_edge_list (e, move, true); + } +} + +/* Bitmap of allocnos local for the current loop. */ +static bitmap local_allocno_bitmap; + +/* This bitmap is used to find that we need to generate and to use a + new pseudo-register when processing allocnos with the same original + regno. */ +static bitmap used_regno_bitmap; + +/* This bitmap contains regnos of allocnos which were renamed locally + because the allocnos correspond to disjoint live ranges in loops + with a common parent. */ +static bitmap renamed_regno_bitmap; + +/* Change (if necessary) pseudo-registers inside loop given by loop + tree node NODE. */ +static void +change_loop (ira_loop_tree_node_t node) +{ + bitmap_iterator bi; + unsigned int i; + int regno; + bool used_p; + ira_allocno_t allocno, parent_allocno, *map; + rtx insn, original_reg; + enum reg_class cover_class; + ira_loop_tree_node_t parent; + + if (node != ira_loop_tree_root) + { + + if (node->bb != NULL) + { + FOR_BB_INSNS (node->bb, insn) + if (INSN_P (insn) && change_regs (&insn)) + { + df_insn_rescan (insn); + df_notes_rescan (insn); + } + return; + } + + if (internal_flag_ira_verbose > 3 && ira_dump_file != NULL) + fprintf (ira_dump_file, + " Changing RTL for loop %d (header bb%d)\n", + node->loop->num, node->loop->header->index); + + parent = ira_curr_loop_tree_node->parent; + map = parent->regno_allocno_map; + EXECUTE_IF_SET_IN_REG_SET (ira_curr_loop_tree_node->border_allocnos, + 0, i, bi) + { + allocno = ira_allocnos[i]; + regno = ALLOCNO_REGNO (allocno); + cover_class = ALLOCNO_COVER_CLASS (allocno); + parent_allocno = map[regno]; + ira_assert (regno < ira_reg_equiv_len); + /* We generate the same hard register move because the + reload pass can put an allocno into memory in this case + we will have live range splitting. If it does not happen + such the same hard register moves will be removed. The + worst case when the both allocnos are put into memory by + the reload is very rare. */ + if (parent_allocno != NULL + && (ALLOCNO_HARD_REGNO (allocno) + == ALLOCNO_HARD_REGNO (parent_allocno)) + && (ALLOCNO_HARD_REGNO (allocno) < 0 + || (parent->reg_pressure[cover_class] + 1 + <= ira_available_class_regs[cover_class]) + || TEST_HARD_REG_BIT (ira_prohibited_mode_move_regs + [ALLOCNO_MODE (allocno)], + ALLOCNO_HARD_REGNO (allocno)) + /* don't create copies because reload can spill an + allocno set by copy although the allocno will not + get memory slot. */ + || ira_reg_equiv_invariant_p[regno] + || ira_reg_equiv_const[regno] != NULL_RTX)) + continue; + original_reg = ALLOCNO_REG (allocno); + if (parent_allocno == NULL + || REGNO (ALLOCNO_REG (parent_allocno)) == REGNO (original_reg)) + { + if (internal_flag_ira_verbose > 3 && ira_dump_file) + fprintf (ira_dump_file, " %i vs parent %i:", + ALLOCNO_HARD_REGNO (allocno), + ALLOCNO_HARD_REGNO (parent_allocno)); + set_allocno_reg (allocno, create_new_reg (original_reg)); + } + } + } + /* Rename locals: Local allocnos with same regno in different loops + might get the different hard register. So we need to change + ALLOCNO_REG. */ + bitmap_and_compl (local_allocno_bitmap, + ira_curr_loop_tree_node->mentioned_allocnos, + ira_curr_loop_tree_node->border_allocnos); + EXECUTE_IF_SET_IN_REG_SET (local_allocno_bitmap, 0, i, bi) + { + allocno = ira_allocnos[i]; + regno = ALLOCNO_REGNO (allocno); + if (ALLOCNO_CAP_MEMBER (allocno) != NULL) + continue; + used_p = bitmap_bit_p (used_regno_bitmap, regno); + bitmap_set_bit (used_regno_bitmap, regno); + ALLOCNO_SOMEWHERE_RENAMED_P (allocno) = true; + if (! used_p) + continue; + bitmap_set_bit (renamed_regno_bitmap, regno); + set_allocno_reg (allocno, create_new_reg (ALLOCNO_REG (allocno))); + } +} + +/* Process to set up flag somewhere_renamed_p. */ +static void +set_allocno_somewhere_renamed_p (void) +{ + unsigned int regno; + ira_allocno_t allocno; + ira_allocno_iterator ai; + + FOR_EACH_ALLOCNO (allocno, ai) + { + regno = ALLOCNO_REGNO (allocno); + if (bitmap_bit_p (renamed_regno_bitmap, regno) + && REGNO (ALLOCNO_REG (allocno)) == regno) + ALLOCNO_SOMEWHERE_RENAMED_P (allocno) = true; + } +} + +/* Return TRUE if move lists on all edges given in vector VEC are + equal. */ +static bool +eq_edge_move_lists_p (VEC(edge,gc) *vec) +{ + move_t list; + int i; + + list = (move_t) EDGE_I (vec, 0)->aux; + for (i = EDGE_COUNT (vec) - 1; i > 0; i--) + if (! eq_move_lists_p (list, (move_t) EDGE_I (vec, i)->aux)) + return false; + return true; +} + +/* Look at all entry edges (if START_P) or exit edges of basic block + BB and put move lists at the BB start or end if it is possible. In + other words, this decreases code duplication of allocno moves. */ +static void +unify_moves (basic_block bb, bool start_p) +{ + int i; + edge e; + move_t list; + VEC(edge,gc) *vec; + + vec = (start_p ? bb->preds : bb->succs); + if (EDGE_COUNT (vec) == 0 || ! eq_edge_move_lists_p (vec)) + return; + e = EDGE_I (vec, 0); + list = (move_t) e->aux; + if (! start_p && control_flow_insn_p (BB_END (bb))) + return; + e->aux = NULL; + for (i = EDGE_COUNT (vec) - 1; i > 0; i--) + { + e = EDGE_I (vec, i); + free_move_list ((move_t) e->aux); + e->aux = NULL; + } + if (start_p) + at_bb_start[bb->index] = list; + else + at_bb_end[bb->index] = list; +} + +/* Last move (in move sequence being processed) setting up the + corresponding hard register. */ +static move_t hard_regno_last_set[FIRST_PSEUDO_REGISTER]; + +/* If the element value is equal to CURR_TICK then the corresponding + element in `hard_regno_last_set' is defined and correct. */ +static int hard_regno_last_set_check[FIRST_PSEUDO_REGISTER]; + +/* Last move (in move sequence being processed) setting up the + corresponding allocno. */ +static move_t *allocno_last_set; + +/* If the element value is equal to CURR_TICK then the corresponding + element in . `allocno_last_set' is defined and correct. */ +static int *allocno_last_set_check; + +/* Definition of vector of moves. */ +DEF_VEC_P(move_t); +DEF_VEC_ALLOC_P(move_t, heap); + +/* This vec contains moves sorted topologically (depth-first) on their + dependency graph. */ +static VEC(move_t,heap) *move_vec; + +/* The variable value is used to check correctness of values of + elements of arrays `hard_regno_last_set' and + `allocno_last_set_check'. */ +static int curr_tick; + +/* This recursive function traverses dependencies of MOVE and produces + topological sorting (in depth-first order). */ +static void +traverse_moves (move_t move) +{ + int i; + + if (move->visited_p) + return; + move->visited_p = true; + for (i = move->deps_num - 1; i >= 0; i--) + traverse_moves (move->deps[i]); + VEC_safe_push (move_t, heap, move_vec, move); +} + +/* Remove unnecessary moves in the LIST, makes topological sorting, + and removes cycles on hard reg dependencies by introducing new + allocnos assigned to memory and additional moves. It returns the + result move list. */ +static move_t +modify_move_list (move_t list) +{ + int i, n, nregs, hard_regno; + ira_allocno_t to, from, new_allocno; + move_t move, new_move, set_move, first, last; + + if (list == NULL) + return NULL; + /* Creat move deps. */ + curr_tick++; + for (move = list; move != NULL; move = move->next) + { + to = move->to; + if ((hard_regno = ALLOCNO_HARD_REGNO (to)) < 0) + continue; + nregs = hard_regno_nregs[hard_regno][ALLOCNO_MODE (to)]; + for (i = 0; i < nregs; i++) + { + hard_regno_last_set[hard_regno + i] = move; + hard_regno_last_set_check[hard_regno + i] = curr_tick; + } + } + for (move = list; move != NULL; move = move->next) + { + from = move->from; + to = move->to; + if ((hard_regno = ALLOCNO_HARD_REGNO (from)) >= 0) + { + nregs = hard_regno_nregs[hard_regno][ALLOCNO_MODE (from)]; + for (n = i = 0; i < nregs; i++) + if (hard_regno_last_set_check[hard_regno + i] == curr_tick + && (ALLOCNO_REGNO (hard_regno_last_set[hard_regno + i]->to) + != ALLOCNO_REGNO (from))) + n++; + move->deps = (move_t *) ira_allocate (n * sizeof (move_t)); + for (n = i = 0; i < nregs; i++) + if (hard_regno_last_set_check[hard_regno + i] == curr_tick + && (ALLOCNO_REGNO (hard_regno_last_set[hard_regno + i]->to) + != ALLOCNO_REGNO (from))) + move->deps[n++] = hard_regno_last_set[hard_regno + i]; + move->deps_num = n; + } + } + /* Toplogical sorting: */ + VEC_truncate (move_t, move_vec, 0); + for (move = list; move != NULL; move = move->next) + traverse_moves (move); + last = NULL; + for (i = (int) VEC_length (move_t, move_vec) - 1; i >= 0; i--) + { + move = VEC_index (move_t, move_vec, i); + move->next = NULL; + if (last != NULL) + last->next = move; + last = move; + } + first = VEC_last (move_t, move_vec); + /* Removing cycles: */ + curr_tick++; + VEC_truncate (move_t, move_vec, 0); + for (move = first; move != NULL; move = move->next) + { + from = move->from; + to = move->to; + if ((hard_regno = ALLOCNO_HARD_REGNO (from)) >= 0) + { + nregs = hard_regno_nregs[hard_regno][ALLOCNO_MODE (from)]; + for (i = 0; i < nregs; i++) + if (hard_regno_last_set_check[hard_regno + i] == curr_tick + && ALLOCNO_HARD_REGNO + (hard_regno_last_set[hard_regno + i]->to) >= 0) + { + set_move = hard_regno_last_set[hard_regno + i]; + /* It does not matter what loop_tree_node (of TO or + FROM) to use for the new allocno because of + subsequent IRA internal representation + flattening. */ + new_allocno + = ira_create_allocno (ALLOCNO_REGNO (set_move->to), false, + ALLOCNO_LOOP_TREE_NODE (set_move->to)); + ALLOCNO_MODE (new_allocno) = ALLOCNO_MODE (set_move->to); + ira_set_allocno_cover_class + (new_allocno, ALLOCNO_COVER_CLASS (set_move->to)); + ALLOCNO_ASSIGNED_P (new_allocno) = true; + ALLOCNO_HARD_REGNO (new_allocno) = -1; + ALLOCNO_REG (new_allocno) + = create_new_reg (ALLOCNO_REG (set_move->to)); + ALLOCNO_CONFLICT_ID (new_allocno) = ALLOCNO_NUM (new_allocno); + /* Make it possibly conflicting with all earlier + created allocnos. Cases where temporary allocnos + created to remove the cycles are quite rare. */ + ALLOCNO_MIN (new_allocno) = 0; + ALLOCNO_MAX (new_allocno) = ira_allocnos_num - 1; + new_move = create_move (set_move->to, new_allocno); + set_move->to = new_allocno; + VEC_safe_push (move_t, heap, move_vec, new_move); + ira_move_loops_num++; + if (internal_flag_ira_verbose > 2 && ira_dump_file != NULL) + fprintf (ira_dump_file, + " Creating temporary allocno a%dr%d\n", + ALLOCNO_NUM (new_allocno), + REGNO (ALLOCNO_REG (new_allocno))); + } + } + if ((hard_regno = ALLOCNO_HARD_REGNO (to)) < 0) + continue; + nregs = hard_regno_nregs[hard_regno][ALLOCNO_MODE (to)]; + for (i = 0; i < nregs; i++) + { + hard_regno_last_set[hard_regno + i] = move; + hard_regno_last_set_check[hard_regno + i] = curr_tick; + } + } + for (i = (int) VEC_length (move_t, move_vec) - 1; i >= 0; i--) + { + move = VEC_index (move_t, move_vec, i); + move->next = NULL; + last->next = move; + last = move; + } + return first; +} + +/* Generate RTX move insns from the move list LIST. This updates + allocation cost using move execution frequency FREQ. */ +static rtx +emit_move_list (move_t list, int freq) +{ + int cost; + rtx result, insn; + enum machine_mode mode; + enum reg_class cover_class; + + start_sequence (); + for (; list != NULL; list = list->next) + { + start_sequence (); + emit_move_insn (ALLOCNO_REG (list->to), ALLOCNO_REG (list->from)); + list->insn = get_insns (); + end_sequence (); + /* The reload needs to have set up insn codes. If the reload + sets up insn codes by itself, it may fail because insns will + have hard registers instead of pseudos and there may be no + machine insn with given hard registers. */ + for (insn = list->insn; insn != NULL_RTX; insn = NEXT_INSN (insn)) + recog_memoized (insn); + emit_insn (list->insn); + mode = ALLOCNO_MODE (list->to); + cover_class = ALLOCNO_COVER_CLASS (list->to); + cost = 0; + if (ALLOCNO_HARD_REGNO (list->to) < 0) + { + if (ALLOCNO_HARD_REGNO (list->from) >= 0) + { + cost = ira_memory_move_cost[mode][cover_class][0] * freq; + ira_store_cost += cost; + } + } + else if (ALLOCNO_HARD_REGNO (list->from) < 0) + { + if (ALLOCNO_HARD_REGNO (list->to) >= 0) + { + cost = ira_memory_move_cost[mode][cover_class][0] * freq; + ira_load_cost += cost; + } + } + else + { + cost = ira_register_move_cost[mode][cover_class][cover_class] * freq; + ira_shuffle_cost += cost; + } + ira_overall_cost += cost; + } + result = get_insns (); + end_sequence (); + return result; +} + +/* Generate RTX move insns from move lists attached to basic blocks + and edges. */ +static void +emit_moves (void) +{ + basic_block bb; + edge_iterator ei; + edge e; + rtx insns, tmp; + + FOR_EACH_BB (bb) + { + if (at_bb_start[bb->index] != NULL) + { + at_bb_start[bb->index] = modify_move_list (at_bb_start[bb->index]); + insns = emit_move_list (at_bb_start[bb->index], + REG_FREQ_FROM_BB (bb)); + tmp = BB_HEAD (bb); + if (LABEL_P (tmp)) + tmp = NEXT_INSN (tmp); + if (NOTE_INSN_BASIC_BLOCK_P (tmp)) + tmp = NEXT_INSN (tmp); + if (tmp == BB_HEAD (bb)) + emit_insn_before (insns, tmp); + else if (tmp != NULL_RTX) + emit_insn_after (insns, PREV_INSN (tmp)); + else + emit_insn_after (insns, get_last_insn ()); + } + + if (at_bb_end[bb->index] != NULL) + { + at_bb_end[bb->index] = modify_move_list (at_bb_end[bb->index]); + insns = emit_move_list (at_bb_end[bb->index], REG_FREQ_FROM_BB (bb)); + ira_assert (! control_flow_insn_p (BB_END (bb))); + emit_insn_after (insns, BB_END (bb)); + } + + FOR_EACH_EDGE (e, ei, bb->succs) + { + if (e->aux == NULL) + continue; + ira_assert ((e->flags & EDGE_ABNORMAL) == 0 + || ! EDGE_CRITICAL_P (e)); + e->aux = modify_move_list ((move_t) e->aux); + insert_insn_on_edge + (emit_move_list ((move_t) e->aux, + REG_FREQ_FROM_EDGE_FREQ (EDGE_FREQUENCY (e))), + e); + if (e->src->next_bb != e->dest) + ira_additional_jumps_num++; + } + } +} + +/* Update costs of A and corresponding allocnos on upper levels on the + loop tree from reading (if READ_P) or writing A on an execution + path with FREQ. */ +static void +update_costs (ira_allocno_t a, bool read_p, int freq) +{ + ira_loop_tree_node_t parent; + + for (;;) + { + ALLOCNO_NREFS (a)++; + ALLOCNO_FREQ (a) += freq; + ALLOCNO_MEMORY_COST (a) + += (ira_memory_move_cost[ALLOCNO_MODE (a)][ALLOCNO_COVER_CLASS (a)] + [read_p ? 1 : 0] * freq); + if (ALLOCNO_CAP (a) != NULL) + a = ALLOCNO_CAP (a); + else if ((parent = ALLOCNO_LOOP_TREE_NODE (a)->parent) == NULL + || (a = parent->regno_allocno_map[ALLOCNO_REGNO (a)]) == NULL) + break; + } +} + +/* Process moves from LIST with execution FREQ to add ranges, copies, + and modify costs for allocnos involved in the moves. All regnos + living through the list is in LIVE_THROUGH, and the loop tree node + used to find corresponding allocnos is NODE. */ +static void +add_range_and_copies_from_move_list (move_t list, ira_loop_tree_node_t node, + bitmap live_through, int freq) +{ + int start, n; + unsigned int regno; + move_t move; + ira_allocno_t to, from, a; + ira_copy_t cp; + allocno_live_range_t r; + bitmap_iterator bi; + HARD_REG_SET hard_regs_live; + + if (list == NULL) + return; + n = 0; + EXECUTE_IF_SET_IN_BITMAP (live_through, FIRST_PSEUDO_REGISTER, regno, bi) + n++; + REG_SET_TO_HARD_REG_SET (hard_regs_live, live_through); + /* This is a trick to guarantee that new ranges is not merged with + the old ones. */ + ira_max_point++; + start = ira_max_point; + for (move = list; move != NULL; move = move->next) + { + from = move->from; + to = move->to; + if (ALLOCNO_CONFLICT_ALLOCNO_ARRAY (to) == NULL) + { + if (internal_flag_ira_verbose > 2 && ira_dump_file != NULL) + fprintf (ira_dump_file, " Allocate conflicts for a%dr%d\n", + ALLOCNO_NUM (to), REGNO (ALLOCNO_REG (to))); + ira_allocate_allocno_conflicts (to, n); + } + bitmap_clear_bit (live_through, ALLOCNO_REGNO (from)); + bitmap_clear_bit (live_through, ALLOCNO_REGNO (to)); + IOR_HARD_REG_SET (ALLOCNO_CONFLICT_HARD_REGS (from), hard_regs_live); + IOR_HARD_REG_SET (ALLOCNO_CONFLICT_HARD_REGS (to), hard_regs_live); + IOR_HARD_REG_SET (ALLOCNO_TOTAL_CONFLICT_HARD_REGS (from), + hard_regs_live); + IOR_HARD_REG_SET (ALLOCNO_TOTAL_CONFLICT_HARD_REGS (to), hard_regs_live); + update_costs (from, true, freq); + update_costs (to, false, freq); + cp = ira_add_allocno_copy (from, to, freq, move->insn, NULL); + if (internal_flag_ira_verbose > 2 && ira_dump_file != NULL) + fprintf (ira_dump_file, " Adding cp%d:a%dr%d-a%dr%d\n", + cp->num, ALLOCNO_NUM (cp->first), + REGNO (ALLOCNO_REG (cp->first)), ALLOCNO_NUM (cp->second), + REGNO (ALLOCNO_REG (cp->second))); + r = ALLOCNO_LIVE_RANGES (from); + if (r == NULL || r->finish >= 0) + { + ALLOCNO_LIVE_RANGES (from) + = ira_create_allocno_live_range (from, start, ira_max_point, r); + if (internal_flag_ira_verbose > 2 && ira_dump_file != NULL) + fprintf (ira_dump_file, + " Adding range [%d..%d] to allocno a%dr%d\n", + start, ira_max_point, ALLOCNO_NUM (from), + REGNO (ALLOCNO_REG (from))); + } + else + r->finish = ira_max_point; + ira_max_point++; + ALLOCNO_LIVE_RANGES (to) + = ira_create_allocno_live_range (to, ira_max_point, -1, + ALLOCNO_LIVE_RANGES (to)); + ira_max_point++; + } + for (move = list; move != NULL; move = move->next) + { + r = ALLOCNO_LIVE_RANGES (move->to); + if (r->finish < 0) + { + r->finish = ira_max_point - 1; + if (internal_flag_ira_verbose > 2 && ira_dump_file != NULL) + fprintf (ira_dump_file, + " Adding range [%d..%d] to allocno a%dr%d\n", + r->start, r->finish, ALLOCNO_NUM (move->to), + REGNO (ALLOCNO_REG (move->to))); + } + } + EXECUTE_IF_SET_IN_BITMAP (live_through, FIRST_PSEUDO_REGISTER, regno, bi) + { + a = node->regno_allocno_map[regno]; + if (ALLOCNO_MEM_OPTIMIZED_DEST (a) == NULL) + { + ALLOCNO_LIVE_RANGES (a) + = ira_create_allocno_live_range (a, start, ira_max_point - 1, + ALLOCNO_LIVE_RANGES (a)); + if (internal_flag_ira_verbose > 2 && ira_dump_file != NULL) + fprintf + (ira_dump_file, + " Adding range [%d..%d] to live through allocno a%dr%d\n", + start, ira_max_point - 1, ALLOCNO_NUM (a), + REGNO (ALLOCNO_REG (a))); + } + } +} + +/* Process all move list to add ranges, conflicts, copies, and modify + costs for allocnos involved in the moves. */ +static void +add_ranges_and_copies (void) +{ + basic_block bb; + edge_iterator ei; + edge e; + ira_loop_tree_node_t node; + bitmap live_through; + + live_through = ira_allocate_bitmap (); + FOR_EACH_BB (bb) + { + /* It does not matter what loop_tree_node (of source or + destination block) to use for searching allocnos by their + regnos because of subsequent IR flattening. */ + node = IRA_BB_NODE (bb)->parent; + bitmap_copy (live_through, DF_LR_IN (bb)); + add_range_and_copies_from_move_list + (at_bb_start[bb->index], node, live_through, REG_FREQ_FROM_BB (bb)); + bitmap_copy (live_through, DF_LR_OUT (bb)); + add_range_and_copies_from_move_list + (at_bb_end[bb->index], node, live_through, REG_FREQ_FROM_BB (bb)); + FOR_EACH_EDGE (e, ei, bb->succs) + { + bitmap_and (live_through, DF_LR_IN (e->dest), DF_LR_OUT (bb)); + add_range_and_copies_from_move_list + ((move_t) e->aux, node, live_through, + REG_FREQ_FROM_EDGE_FREQ (EDGE_FREQUENCY (e))); + } + } + ira_free_bitmap (live_through); +} + +/* The entry function changes code and generates shuffling allocnos on + region borders for the regional (LOOPS_P is TRUE in this case) + register allocation. */ +void +ira_emit (bool loops_p) +{ + basic_block bb; + edge_iterator ei; + edge e; + ira_allocno_t a; + ira_allocno_iterator ai; + + FOR_EACH_ALLOCNO (a, ai) + ALLOCNO_REG (a) = regno_reg_rtx[ALLOCNO_REGNO (a)]; + if (! loops_p) + return; + at_bb_start = (move_t *) ira_allocate (sizeof (move_t) * last_basic_block); + memset (at_bb_start, 0, sizeof (move_t) * last_basic_block); + at_bb_end = (move_t *) ira_allocate (sizeof (move_t) * last_basic_block); + memset (at_bb_end, 0, sizeof (move_t) * last_basic_block); + local_allocno_bitmap = ira_allocate_bitmap (); + used_regno_bitmap = ira_allocate_bitmap (); + renamed_regno_bitmap = ira_allocate_bitmap (); + max_regno_before_changing = max_reg_num (); + ira_traverse_loop_tree (true, ira_loop_tree_root, change_loop, NULL); + set_allocno_somewhere_renamed_p (); + ira_free_bitmap (used_regno_bitmap); + ira_free_bitmap (renamed_regno_bitmap); + ira_free_bitmap (local_allocno_bitmap); + FOR_EACH_BB (bb) + { + at_bb_start[bb->index] = NULL; + at_bb_end[bb->index] = NULL; + FOR_EACH_EDGE (e, ei, bb->succs) + if (e->dest != EXIT_BLOCK_PTR) + generate_edge_moves (e); + } + allocno_last_set + = (move_t *) ira_allocate (sizeof (move_t) * max_reg_num ()); + allocno_last_set_check + = (int *) ira_allocate (sizeof (int) * max_reg_num ()); + memset (allocno_last_set_check, 0, sizeof (int) * max_reg_num ()); + memset (hard_regno_last_set_check, 0, sizeof (hard_regno_last_set_check)); + curr_tick = 0; + FOR_EACH_BB (bb) + unify_moves (bb, true); + FOR_EACH_BB (bb) + unify_moves (bb, false); + move_vec = VEC_alloc (move_t, heap, ira_allocnos_num); + emit_moves (); + add_ranges_and_copies (); + /* Clean up: */ + FOR_EACH_BB (bb) + { + free_move_list (at_bb_start[bb->index]); + free_move_list (at_bb_end[bb->index]); + FOR_EACH_EDGE (e, ei, bb->succs) + { + free_move_list ((move_t) e->aux); + e->aux = NULL; + } + } + VEC_free (move_t, heap, move_vec); + ira_free (allocno_last_set_check); + ira_free (allocno_last_set); + commit_edge_insertions (); + ira_free (at_bb_end); + ira_free (at_bb_start); +} diff --git a/gcc/ira-int.h b/gcc/ira-int.h new file mode 100644 index 00000000000..f656cf16737 --- /dev/null +++ b/gcc/ira-int.h @@ -0,0 +1,1200 @@ +/* Integrated Register Allocator (IRA) intercommunication header file. + Copyright (C) 2006, 2007, 2008 + Free Software Foundation, Inc. + Contributed by Vladimir Makarov <vmakarov@redhat.com>. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 3, or (at your option) any later +version. + +GCC is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +<http://www.gnu.org/licenses/>. */ + +#include "cfgloop.h" +#include "ira.h" +#include "alloc-pool.h" + +/* To provide consistency in naming, all IRA external variables, + functions, common typedefs start with prefix ira_. */ + +#ifdef ENABLE_CHECKING +#define ENABLE_IRA_CHECKING +#endif + +#ifdef ENABLE_IRA_CHECKING +#define ira_assert(c) gcc_assert (c) +#else +#define ira_assert(c) +#endif + +/* Compute register frequency from edge frequency FREQ. It is + analogous to REG_FREQ_FROM_BB. When optimizing for size, or + profile driven feedback is available and the function is never + executed, frequency is always equivalent. Otherwise rescale the + edge frequency. */ +#define REG_FREQ_FROM_EDGE_FREQ(freq) \ + (optimize_size || (flag_branch_probabilities && !ENTRY_BLOCK_PTR->count) \ + ? REG_FREQ_MAX : (freq * REG_FREQ_MAX / BB_FREQ_MAX) \ + ? (freq * REG_FREQ_MAX / BB_FREQ_MAX) : 1) + +/* All natural loops. */ +extern struct loops ira_loops; + +/* A modified value of flag `-fira-verbose' used internally. */ +extern int internal_flag_ira_verbose; + +/* Dump file of the allocator if it is not NULL. */ +extern FILE *ira_dump_file; + +/* Typedefs for pointers to allocno live range, allocno, and copy of + allocnos. */ +typedef struct ira_allocno_live_range *allocno_live_range_t; +typedef struct ira_allocno *ira_allocno_t; +typedef struct ira_allocno_copy *ira_copy_t; + +/* Definition of vector of allocnos and copies. */ +DEF_VEC_P(ira_allocno_t); +DEF_VEC_ALLOC_P(ira_allocno_t, heap); +DEF_VEC_P(ira_copy_t); +DEF_VEC_ALLOC_P(ira_copy_t, heap); + +/* Typedef for pointer to the subsequent structure. */ +typedef struct ira_loop_tree_node *ira_loop_tree_node_t; + +/* In general case, IRA is a regional allocator. The regions are + nested and form a tree. Currently regions are natural loops. The + following structure describes loop tree node (representing basic + block or loop). We need such tree because the loop tree from + cfgloop.h is not convenient for the optimization: basic blocks are + not a part of the tree from cfgloop.h. We also use the nodes for + storing additional information about basic blocks/loops for the + register allocation purposes. */ +struct ira_loop_tree_node +{ + /* The node represents basic block if children == NULL. */ + basic_block bb; /* NULL for loop. */ + struct loop *loop; /* NULL for BB. */ + /* The next (loop) node of with the same parent. SUBLOOP_NEXT is + always NULL for BBs. */ + ira_loop_tree_node_t subloop_next, next; + /* The first (loop) node immediately inside the node. SUBLOOPS is + always NULL for BBs. */ + ira_loop_tree_node_t subloops, children; + /* The node immediately containing given node. */ + ira_loop_tree_node_t parent; + + /* Loop level in range [0, ira_loop_tree_height). */ + int level; + + /* All the following members are defined only for nodes representing + loops. */ + + /* Allocnos in the loop corresponding to their regnos. If it is + NULL the loop does not form a separate register allocation region + (e.g. because it has abnormal enter/exit edges and we can not put + code for register shuffling on the edges if a different + allocation is used for a pseudo-register on different sides of + the edges). Caps are not in the map (remember we can have more + one cap with the same regno in a region). */ + ira_allocno_t *regno_allocno_map; + + /* Maximal register pressure inside loop for given register class + (defined only for the cover classes). */ + int reg_pressure[N_REG_CLASSES]; + + /* Numbers of allocnos referred in the loop node. */ + bitmap mentioned_allocnos; + + /* Regnos of pseudos modified in the loop node (including its + subloops). */ + bitmap modified_regnos; + + /* Numbers of allocnos living at the loop borders. */ + bitmap border_allocnos; + + /* Numbers of copies referred in the corresponding loop. */ + bitmap local_copies; +}; + +/* The root of the loop tree corresponding to the all function. */ +extern ira_loop_tree_node_t ira_loop_tree_root; + +/* Height of the loop tree. */ +extern int ira_loop_tree_height; + +/* All nodes representing basic blocks are referred through the + following array. We can not use basic block member `aux' for this + because it is used for insertion of insns on edges. */ +extern ira_loop_tree_node_t ira_bb_nodes; + +/* Two access macros to the nodes representing basic blocks. */ +#if defined ENABLE_IRA_CHECKING && (GCC_VERSION >= 2007) +#define IRA_BB_NODE_BY_INDEX(index) __extension__ \ +(({ ira_loop_tree_node_t _node = (&ira_bb_nodes[index]); \ + if (_node->children != NULL || _node->loop != NULL || _node->bb == NULL)\ + { \ + fprintf (stderr, \ + "\n%s: %d: error in %s: it is not a block node\n", \ + __FILE__, __LINE__, __FUNCTION__); \ + gcc_unreachable (); \ + } \ + _node; })) +#else +#define IRA_BB_NODE_BY_INDEX(index) (&ira_bb_nodes[index]) +#endif + +#define IRA_BB_NODE(bb) IRA_BB_NODE_BY_INDEX ((bb)->index) + +/* All nodes representing loops are referred through the following + array. */ +extern ira_loop_tree_node_t ira_loop_nodes; + +/* Two access macros to the nodes representing loops. */ +#if defined ENABLE_IRA_CHECKING && (GCC_VERSION >= 2007) +#define IRA_LOOP_NODE_BY_INDEX(index) __extension__ \ +(({ ira_loop_tree_node_t const _node = (&ira_loop_nodes[index]);\ + if (_node->children == NULL || _node->bb != NULL || _node->loop == NULL)\ + { \ + fprintf (stderr, \ + "\n%s: %d: error in %s: it is not a loop node\n", \ + __FILE__, __LINE__, __FUNCTION__); \ + gcc_unreachable (); \ + } \ + _node; })) +#else +#define IRA_LOOP_NODE_BY_INDEX(index) (&ira_loop_nodes[index]) +#endif + +#define IRA_LOOP_NODE(loop) IRA_LOOP_NODE_BY_INDEX ((loop)->num) + + + +/* The structure describes program points where a given allocno lives. + To save memory we store allocno conflicts only for the same cover + class allocnos which is enough to assign hard registers. To find + conflicts for other allocnos (e.g. to assign stack memory slot) we + use the live ranges. If the live ranges of two allocnos are + intersected, the allocnos are in conflict. */ +struct ira_allocno_live_range +{ + /* Allocno whose live range is described by given structure. */ + ira_allocno_t allocno; + /* Program point range. */ + int start, finish; + /* Next structure describing program points where the allocno + lives. */ + allocno_live_range_t next; + /* Pointer to structures with the same start/finish. */ + allocno_live_range_t start_next, finish_next; +}; + +/* Program points are enumerated by numbers from range + 0..IRA_MAX_POINT-1. There are approximately two times more program + points than insns. Program points are places in the program where + liveness info can be changed. In most general case (there are more + complicated cases too) some program points correspond to places + where input operand dies and other ones correspond to places where + output operands are born. */ +extern int ira_max_point; + +/* Arrays of size IRA_MAX_POINT mapping a program point to the allocno + live ranges with given start/finish point. */ +extern allocno_live_range_t *ira_start_point_ranges, *ira_finish_point_ranges; + +/* A structure representing an allocno (allocation entity). Allocno + represents a pseudo-register in an allocation region. If + pseudo-register does not live in a region but it lives in the + nested regions, it is represented in the region by special allocno + called *cap*. There may be more one cap representing the same + pseudo-register in region. It means that the corresponding + pseudo-register lives in more one non-intersected subregion. */ +struct ira_allocno +{ + /* The allocno order number starting with 0. Each allocno has an + unique number and the number is never changed for the + allocno. */ + int num; + /* Regno for allocno or cap. */ + int regno; + /* Mode of the allocno which is the mode of the corresponding + pseudo-register. */ + enum machine_mode mode; + /* Final rtx representation of the allocno. */ + rtx reg; + /* Hard register assigned to given allocno. Negative value means + that memory was allocated to the allocno. During the reload, + spilled allocno has value equal to the corresponding stack slot + number (0, ...) - 2. Value -1 is used for allocnos spilled by the + reload (at this point pseudo-register has only one allocno) which + did not get stack slot yet. */ + int hard_regno; + /* Allocnos with the same regno are linked by the following member. + Allocnos corresponding to inner loops are first in the list (it + corresponds to depth-first traverse of the loops). */ + ira_allocno_t next_regno_allocno; + /* There may be different allocnos with the same regno in different + regions. Allocnos are bound to the corresponding loop tree node. + Pseudo-register may have only one regular allocno with given loop + tree node but more than one cap (see comments above). */ + ira_loop_tree_node_t loop_tree_node; + /* Accumulated usage references of the allocno. Here and below, + word 'accumulated' means info for given region and all nested + subregions. In this case, 'accumulated' means sum of references + of the corresponding pseudo-register in this region and in all + nested subregions recursively. */ + int nrefs; + /* Accumulated frequency of usage of the allocno. */ + int freq; + /* Register class which should be used for allocation for given + allocno. NO_REGS means that we should use memory. */ + enum reg_class cover_class; + /* Minimal accumulated cost of usage register of the cover class for + the allocno. */ + int cover_class_cost; + /* Minimal accumulated, and updated costs of memory for the allocno. + At the allocation start, the original and updated costs are + equal. The updated cost may be changed after finishing + allocation in a region and starting allocation in a subregion. + The change reflects the cost of spill/restore code on the + subregion border if we assign memory to the pseudo in the + subregion. */ + int memory_cost, updated_memory_cost; + /* Accumulated number of points where the allocno lives and there is + excess pressure for its class. Excess pressure for a register + class at some point means that there are more allocnos of given + register class living at the point than number of hard-registers + of the class available for the allocation. */ + int excess_pressure_points_num; + /* Copies to other non-conflicting allocnos. The copies can + represent move insn or potential move insn usually because of two + operand insn constraints. */ + ira_copy_t allocno_copies; + /* It is a allocno (cap) representing given allocno on upper loop tree + level. */ + ira_allocno_t cap; + /* It is a link to allocno (cap) on lower loop level represented by + given cap. Null if given allocno is not a cap. */ + ira_allocno_t cap_member; + /* Coalesced allocnos form a cyclic list. One allocno given by + FIRST_COALESCED_ALLOCNO represents all coalesced allocnos. The + list is chained by NEXT_COALESCED_ALLOCNO. */ + ira_allocno_t first_coalesced_allocno; + ira_allocno_t next_coalesced_allocno; + /* Pointer to structures describing at what program point the + allocno lives. We always maintain the list in such way that *the + ranges in the list are not intersected and ordered by decreasing + their program points*. */ + allocno_live_range_t live_ranges; + /* Before building conflicts the two member values are + correspondingly minimal and maximal points of the accumulated + allocno live ranges. After building conflicts the values are + correspondingly minimal and maximal conflict ids of allocnos with + which given allocno can conflict. */ + int min, max; + /* The unique member value represents given allocno in conflict bit + vectors. */ + int conflict_id; + /* Vector of accumulated conflicting allocnos with NULL end marker + (if CONFLICT_VEC_P is true) or conflict bit vector otherwise. + Only allocnos with the same cover class are in the vector or in + the bit vector. */ + void *conflict_allocno_array; + /* Allocated size of the previous array. */ + unsigned int conflict_allocno_array_size; + /* Number of accumulated conflicts in the vector of conflicting + allocnos. */ + int conflict_allocnos_num; + /* Initial and accumulated hard registers conflicting with this + allocno and as a consequences can not be assigned to the allocno. + All non-allocatable hard regs and hard regs of cover classes + different from given allocno one are included in the sets. */ + HARD_REG_SET conflict_hard_regs, total_conflict_hard_regs; + /* Accumulated frequency of calls which given allocno + intersects. */ + int call_freq; + /* Length of the previous array (number of the intersected calls). */ + int calls_crossed_num; + /* Non NULL if we remove restoring value from given allocno to + MEM_OPTIMIZED_DEST at loop exit (see ira-emit.c) because the + allocno value is not changed inside the loop. */ + ira_allocno_t mem_optimized_dest; + /* TRUE if the allocno assigned to memory was a destination of + removed move (see ira-emit.c) at loop exit because the value of + the corresponding pseudo-register is not changed inside the + loop. */ + unsigned int mem_optimized_dest_p : 1; + /* TRUE if the corresponding pseudo-register has disjoint live + ranges and the other allocnos of the pseudo-register except this + one changed REG. */ + unsigned int somewhere_renamed_p : 1; + /* TRUE if allocno with the same REGNO in a subregion has been + renamed, in other words, got a new pseudo-register. */ + unsigned int child_renamed_p : 1; + /* During the reload, value TRUE means that we should not reassign a + hard register to the allocno got memory earlier. It is set up + when we removed memory-memory move insn before each iteration of + the reload. */ + unsigned int dont_reassign_p : 1; +#ifdef STACK_REGS + /* Set to TRUE if allocno can't be assigned to the stack hard + register correspondingly in this region and area including the + region and all its subregions recursively. */ + unsigned int no_stack_reg_p : 1, total_no_stack_reg_p : 1; +#endif + /* TRUE value means that the allocno was not removed yet from the + conflicting graph during colouring. */ + unsigned int in_graph_p : 1; + /* TRUE if a hard register or memory has been assigned to the + allocno. */ + unsigned int assigned_p : 1; + /* TRUE if it is put on the stack to make other allocnos + colorable. */ + unsigned int may_be_spilled_p : 1; + /* TRUE if the allocno was removed from the splay tree used to + choose allocn for spilling (see ira-color.c::. */ + unsigned int splay_removed_p : 1; + /* TRUE if conflicts for given allocno are represented by vector of + pointers to the conflicting allocnos. Otherwise, we use a bit + vector where a bit with given index represents allocno with the + same number. */ + unsigned int conflict_vec_p : 1; + /* Array of usage costs (accumulated and the one updated during + coloring) for each hard register of the allocno cover class. The + member value can be NULL if all costs are the same and equal to + COVER_CLASS_COST. For example, the costs of two different hard + registers can be different if one hard register is callee-saved + and another one is callee-used and the allocno lives through + calls. Another example can be case when for some insn the + corresponding pseudo-register value should be put in specific + register class (e.g. AREG for x86) which is a strict subset of + the allocno cover class (GENERAL_REGS for x86). We have updated + costs to reflect the situation when the usage cost of a hard + register is decreased because the allocno is connected to another + allocno by a copy and the another allocno has been assigned to + the hard register. */ + int *hard_reg_costs, *updated_hard_reg_costs; + /* Array of decreasing costs (accumulated and the one updated during + coloring) for allocnos conflicting with given allocno for hard + regno of the allocno cover class. The member value can be NULL + if all costs are the same. These costs are used to reflect + preferences of other allocnos not assigned yet during assigning + to given allocno. */ + int *conflict_hard_reg_costs, *updated_conflict_hard_reg_costs; + /* Number of the same cover class allocnos with TRUE in_graph_p + value and conflicting with given allocno during each point of + graph coloring. */ + int left_conflicts_num; + /* Number of hard registers of the allocno cover class really + available for the allocno allocation. */ + int available_regs_num; + /* Allocnos in a bucket (used in coloring) chained by the following + two members. */ + ira_allocno_t next_bucket_allocno; + ira_allocno_t prev_bucket_allocno; + /* Used for temporary purposes. */ + int temp; +}; + +/* All members of the allocno structures should be accessed only + through the following macros. */ +#define ALLOCNO_NUM(A) ((A)->num) +#define ALLOCNO_REGNO(A) ((A)->regno) +#define ALLOCNO_REG(A) ((A)->reg) +#define ALLOCNO_NEXT_REGNO_ALLOCNO(A) ((A)->next_regno_allocno) +#define ALLOCNO_LOOP_TREE_NODE(A) ((A)->loop_tree_node) +#define ALLOCNO_CAP(A) ((A)->cap) +#define ALLOCNO_CAP_MEMBER(A) ((A)->cap_member) +#define ALLOCNO_CONFLICT_ALLOCNO_ARRAY(A) ((A)->conflict_allocno_array) +#define ALLOCNO_CONFLICT_ALLOCNO_ARRAY_SIZE(A) \ + ((A)->conflict_allocno_array_size) +#define ALLOCNO_CONFLICT_ALLOCNOS_NUM(A) \ + ((A)->conflict_allocnos_num) +#define ALLOCNO_CONFLICT_HARD_REGS(A) ((A)->conflict_hard_regs) +#define ALLOCNO_TOTAL_CONFLICT_HARD_REGS(A) ((A)->total_conflict_hard_regs) +#define ALLOCNO_NREFS(A) ((A)->nrefs) +#define ALLOCNO_FREQ(A) ((A)->freq) +#define ALLOCNO_HARD_REGNO(A) ((A)->hard_regno) +#define ALLOCNO_CALL_FREQ(A) ((A)->call_freq) +#define ALLOCNO_CALLS_CROSSED_NUM(A) ((A)->calls_crossed_num) +#define ALLOCNO_MEM_OPTIMIZED_DEST(A) ((A)->mem_optimized_dest) +#define ALLOCNO_MEM_OPTIMIZED_DEST_P(A) ((A)->mem_optimized_dest_p) +#define ALLOCNO_SOMEWHERE_RENAMED_P(A) ((A)->somewhere_renamed_p) +#define ALLOCNO_CHILD_RENAMED_P(A) ((A)->child_renamed_p) +#define ALLOCNO_DONT_REASSIGN_P(A) ((A)->dont_reassign_p) +#ifdef STACK_REGS +#define ALLOCNO_NO_STACK_REG_P(A) ((A)->no_stack_reg_p) +#define ALLOCNO_TOTAL_NO_STACK_REG_P(A) ((A)->total_no_stack_reg_p) +#endif +#define ALLOCNO_IN_GRAPH_P(A) ((A)->in_graph_p) +#define ALLOCNO_ASSIGNED_P(A) ((A)->assigned_p) +#define ALLOCNO_MAY_BE_SPILLED_P(A) ((A)->may_be_spilled_p) +#define ALLOCNO_SPLAY_REMOVED_P(A) ((A)->splay_removed_p) +#define ALLOCNO_CONFLICT_VEC_P(A) ((A)->conflict_vec_p) +#define ALLOCNO_MODE(A) ((A)->mode) +#define ALLOCNO_COPIES(A) ((A)->allocno_copies) +#define ALLOCNO_HARD_REG_COSTS(A) ((A)->hard_reg_costs) +#define ALLOCNO_UPDATED_HARD_REG_COSTS(A) ((A)->updated_hard_reg_costs) +#define ALLOCNO_CONFLICT_HARD_REG_COSTS(A) \ + ((A)->conflict_hard_reg_costs) +#define ALLOCNO_UPDATED_CONFLICT_HARD_REG_COSTS(A) \ + ((A)->updated_conflict_hard_reg_costs) +#define ALLOCNO_LEFT_CONFLICTS_NUM(A) ((A)->left_conflicts_num) +#define ALLOCNO_COVER_CLASS(A) ((A)->cover_class) +#define ALLOCNO_COVER_CLASS_COST(A) ((A)->cover_class_cost) +#define ALLOCNO_MEMORY_COST(A) ((A)->memory_cost) +#define ALLOCNO_UPDATED_MEMORY_COST(A) ((A)->updated_memory_cost) +#define ALLOCNO_EXCESS_PRESSURE_POINTS_NUM(A) ((A)->excess_pressure_points_num) +#define ALLOCNO_AVAILABLE_REGS_NUM(A) ((A)->available_regs_num) +#define ALLOCNO_NEXT_BUCKET_ALLOCNO(A) ((A)->next_bucket_allocno) +#define ALLOCNO_PREV_BUCKET_ALLOCNO(A) ((A)->prev_bucket_allocno) +#define IRA_ALLOCNO_TEMP(A) ((A)->temp) +#define ALLOCNO_FIRST_COALESCED_ALLOCNO(A) ((A)->first_coalesced_allocno) +#define ALLOCNO_NEXT_COALESCED_ALLOCNO(A) ((A)->next_coalesced_allocno) +#define ALLOCNO_LIVE_RANGES(A) ((A)->live_ranges) +#define ALLOCNO_MIN(A) ((A)->min) +#define ALLOCNO_MAX(A) ((A)->max) +#define ALLOCNO_CONFLICT_ID(A) ((A)->conflict_id) + +/* Map regno -> allocnos with given regno (see comments for + allocno member `next_regno_allocno'). */ +extern ira_allocno_t *ira_regno_allocno_map; + +/* Array of references to all allocnos. The order number of the + allocno corresponds to the index in the array. Removed allocnos + have NULL element value. */ +extern ira_allocno_t *ira_allocnos; + +/* Sizes of the previous array. */ +extern int ira_allocnos_num; + +/* Map conflict id -> allocno with given conflict id (see comments for + allocno member `conflict_id'). */ +extern ira_allocno_t *ira_conflict_id_allocno_map; + +/* The following structure represents a copy of two allocnos. The + copies represent move insns or potential move insns usually because + of two operand insn constraints. To remove register shuffle, we + also create copies between allocno which is output of an insn and + allocno becoming dead in the insn. */ +struct ira_allocno_copy +{ + /* The unique order number of the copy node starting with 0. */ + int num; + /* Allocnos connected by the copy. The first allocno should have + smaller order number than the second one. */ + ira_allocno_t first, second; + /* Execution frequency of the copy. */ + int freq; + /* It is a move insn which is an origin of the copy. The member + value for the copy representing two operand insn constraints or + for the copy created to remove register shuffle is NULL. In last + case the copy frequency is smaller than the corresponding insn + execution frequency. */ + rtx insn; + /* All copies with the same allocno as FIRST are linked by the two + following members. */ + ira_copy_t prev_first_allocno_copy, next_first_allocno_copy; + /* All copies with the same allocno as SECOND are linked by the two + following members. */ + ira_copy_t prev_second_allocno_copy, next_second_allocno_copy; + /* Region from which given copy is originated. */ + ira_loop_tree_node_t loop_tree_node; +}; + +/* Array of references to all copies. The order number of the copy + corresponds to the index in the array. Removed copies have NULL + element value. */ +extern ira_copy_t *ira_copies; + +/* Size of the previous array. */ +extern int ira_copies_num; + +/* The following structure describes a stack slot used for spilled + pseudo-registers. */ +struct ira_spilled_reg_stack_slot +{ + /* pseudo-registers assigned to the stack slot. */ + regset_head spilled_regs; + /* RTL representation of the stack slot. */ + rtx mem; + /* Size of the stack slot. */ + unsigned int width; +}; + +/* The number of elements in the following array. */ +extern int ira_spilled_reg_stack_slots_num; + +/* The following array contains info about spilled pseudo-registers + stack slots used in current function so far. */ +extern struct ira_spilled_reg_stack_slot *ira_spilled_reg_stack_slots; + +/* Correspondingly overall cost of the allocation, cost of the + allocnos assigned to hard-registers, cost of the allocnos assigned + to memory, cost of loads, stores and register move insns generated + for pseudo-register live range splitting (see ira-emit.c). */ +extern int ira_overall_cost; +extern int ira_reg_cost, ira_mem_cost; +extern int ira_load_cost, ira_store_cost, ira_shuffle_cost; +extern int ira_move_loops_num, ira_additional_jumps_num; + +/* Map: register class x machine mode -> number of hard registers of + given class needed to store value of given mode. If the number for + some hard-registers of the register class is different, the size + will be negative. */ +extern int ira_reg_class_nregs[N_REG_CLASSES][MAX_MACHINE_MODE]; + +/* Maximal value of the previous array elements. */ +extern int ira_max_nregs; + +/* The number of bits in each element of array used to implement a bit + vector of allocnos and what type that element has. We use the + largest integer format on the host machine. */ +#define IRA_INT_BITS HOST_BITS_PER_WIDE_INT +#define IRA_INT_TYPE HOST_WIDE_INT + +/* Set, clear or test bit number I in R, a bit vector of elements with + minimal index and maximal index equal correspondingly to MIN and + MAX. */ +#if defined ENABLE_IRA_CHECKING && (GCC_VERSION >= 2007) + +#define SET_ALLOCNO_SET_BIT(R, I, MIN, MAX) __extension__ \ + (({ int _min = (MIN), _max = (MAX), _i = (I); \ + if (_i < _min || _i > _max) \ + { \ + fprintf (stderr, \ + "\n%s: %d: error in %s: %d not in range [%d,%d]\n", \ + __FILE__, __LINE__, __FUNCTION__, _i, _min, _max); \ + gcc_unreachable (); \ + } \ + ((R)[(unsigned) (_i - _min) / IRA_INT_BITS] \ + |= ((IRA_INT_TYPE) 1 << ((unsigned) (_i - _min) % IRA_INT_BITS))); })) + + +#define CLEAR_ALLOCNO_SET_BIT(R, I, MIN, MAX) __extension__ \ + (({ int _min = (MIN), _max = (MAX), _i = (I); \ + if (_i < _min || _i > _max) \ + { \ + fprintf (stderr, \ + "\n%s: %d: error in %s: %d not in range [%d,%d]\n", \ + __FILE__, __LINE__, __FUNCTION__, _i, _min, _max); \ + gcc_unreachable (); \ + } \ + ((R)[(unsigned) (_i - _min) / IRA_INT_BITS] \ + &= ~((IRA_INT_TYPE) 1 << ((unsigned) (_i - _min) % IRA_INT_BITS))); })) + +#define TEST_ALLOCNO_SET_BIT(R, I, MIN, MAX) __extension__ \ + (({ int _min = (MIN), _max = (MAX), _i = (I); \ + if (_i < _min || _i > _max) \ + { \ + fprintf (stderr, \ + "\n%s: %d: error in %s: %d not in range [%d,%d]\n", \ + __FILE__, __LINE__, __FUNCTION__, _i, _min, _max); \ + gcc_unreachable (); \ + } \ + ((R)[(unsigned) (_i - _min) / IRA_INT_BITS] \ + & ((IRA_INT_TYPE) 1 << ((unsigned) (_i - _min) % IRA_INT_BITS))); })) + +#else + +#define SET_ALLOCNO_SET_BIT(R, I, MIN, MAX) \ + ((R)[(unsigned) ((I) - (MIN)) / IRA_INT_BITS] \ + |= ((IRA_INT_TYPE) 1 << ((unsigned) ((I) - (MIN)) % IRA_INT_BITS))) + +#define CLEAR_ALLOCNO_SET_BIT(R, I, MIN, MAX) \ + ((R)[(unsigned) ((I) - (MIN)) / IRA_INT_BITS] \ + &= ~((IRA_INT_TYPE) 1 << ((unsigned) ((I) - (MIN)) % IRA_INT_BITS))) + +#define TEST_ALLOCNO_SET_BIT(R, I, MIN, MAX) \ + ((R)[(unsigned) ((I) - (MIN)) / IRA_INT_BITS] \ + & ((IRA_INT_TYPE) 1 << ((unsigned) ((I) - (MIN)) % IRA_INT_BITS))) + +#endif + +/* The iterator for allocno set implemented ed as allocno bit + vector. */ +typedef struct { + + /* Array containing the allocno bit vector. */ + IRA_INT_TYPE *vec; + + /* The number of the current element in the vector. */ + unsigned int word_num; + + /* The number of bits in the bit vector. */ + unsigned int nel; + + /* The current bit index of the bit vector. */ + unsigned int bit_num; + + /* Index corresponding to the 1st bit of the bit vector. */ + int start_val; + + /* The word of the bit vector currently visited. */ + unsigned IRA_INT_TYPE word; +} ira_allocno_set_iterator; + +/* Initialize the iterator I for allocnos bit vector VEC containing + minimal and maximal values MIN and MAX. */ +static inline void +ira_allocno_set_iter_init (ira_allocno_set_iterator *i, + IRA_INT_TYPE *vec, int min, int max) +{ + i->vec = vec; + i->word_num = 0; + i->nel = max < min ? 0 : max - min + 1; + i->start_val = min; + i->bit_num = 0; + i->word = i->nel == 0 ? 0 : vec[0]; +} + +/* Return TRUE if we have more allocnos to visit, in which case *N is + set to the allocno number to be visited. Otherwise, return + FALSE. */ +static inline bool +ira_allocno_set_iter_cond (ira_allocno_set_iterator *i, int *n) +{ + /* Skip words that are zeros. */ + for (; i->word == 0; i->word = i->vec[i->word_num]) + { + i->word_num++; + i->bit_num = i->word_num * IRA_INT_BITS; + + /* If we have reached the end, break. */ + if (i->bit_num >= i->nel) + return false; + } + + /* Skip bits that are zero. */ + for (; (i->word & 1) == 0; i->word >>= 1) + i->bit_num++; + + *n = (int) i->bit_num + i->start_val; + + return true; +} + +/* Advance to the next allocno in the set. */ +static inline void +ira_allocno_set_iter_next (ira_allocno_set_iterator *i) +{ + i->word >>= 1; + i->bit_num++; +} + +/* Loop over all elements of allocno set given by bit vector VEC and + their minimal and maximal values MIN and MAX. In each iteration, N + is set to the number of next allocno. ITER is an instance of + ira_allocno_set_iterator used to iterate the allocnos in the set. */ +#define FOR_EACH_ALLOCNO_IN_SET(VEC, MIN, MAX, N, ITER) \ + for (ira_allocno_set_iter_init (&(ITER), (VEC), (MIN), (MAX)); \ + ira_allocno_set_iter_cond (&(ITER), &(N)); \ + ira_allocno_set_iter_next (&(ITER))) + +/* ira.c: */ + +/* Hard regsets whose all bits are correspondingly zero or one. */ +extern HARD_REG_SET ira_zero_hard_reg_set; +extern HARD_REG_SET ira_one_hard_reg_set; + +/* Map: hard regs X modes -> set of hard registers for storing value + of given mode starting with given hard register. */ +extern HARD_REG_SET ira_reg_mode_hard_regset + [FIRST_PSEUDO_REGISTER][NUM_MACHINE_MODES]; + +/* Arrays analogous to macros MEMORY_MOVE_COST and + REGISTER_MOVE_COST. */ +extern short ira_memory_move_cost[MAX_MACHINE_MODE][N_REG_CLASSES][2]; +extern move_table *ira_register_move_cost[MAX_MACHINE_MODE]; + +/* Similar to may_move_in_cost but it is calculated in IRA instead of + regclass. Another difference we take only available hard registers + into account to figure out that one register class is a subset of + the another one. */ +extern move_table *ira_may_move_in_cost[MAX_MACHINE_MODE]; + +/* Similar to may_move_out_cost but it is calculated in IRA instead of + regclass. Another difference we take only available hard registers + into account to figure out that one register class is a subset of + the another one. */ +extern move_table *ira_may_move_out_cost[MAX_MACHINE_MODE]; + +/* Register class subset relation: TRUE if the first class is a subset + of the second one considering only hard registers available for the + allocation. */ +extern int ira_class_subset_p[N_REG_CLASSES][N_REG_CLASSES]; + +/* Array of number of hard registers of given class which are + available for the allocation. The order is defined by the + allocation order. */ +extern short ira_class_hard_regs[N_REG_CLASSES][FIRST_PSEUDO_REGISTER]; + +/* The number of elements of the above array for given register + class. */ +extern int ira_class_hard_regs_num[N_REG_CLASSES]; + +/* Index (in ira_class_hard_regs) for given register class and hard + register (in general case a hard register can belong to several + register classes). The index is negative for hard registers + unavailable for the allocation. */ +extern short ira_class_hard_reg_index[N_REG_CLASSES][FIRST_PSEUDO_REGISTER]; + +/* Function specific hard registers can not be used for the register + allocation. */ +extern HARD_REG_SET ira_no_alloc_regs; + +/* Number of given class hard registers available for the register + allocation for given classes. */ +extern int ira_available_class_regs[N_REG_CLASSES]; + +/* Array whose values are hard regset of hard registers available for + the allocation of given register class whose HARD_REGNO_MODE_OK + values for given mode are zero. */ +extern HARD_REG_SET prohibited_class_mode_regs + [N_REG_CLASSES][NUM_MACHINE_MODES]; + +/* Array whose values are hard regset of hard registers for which + move of the hard register in given mode into itself is + prohibited. */ +extern HARD_REG_SET ira_prohibited_mode_move_regs[NUM_MACHINE_MODES]; + +/* Number of cover classes. Cover classes is non-intersected register + classes containing all hard-registers available for the + allocation. */ +extern int ira_reg_class_cover_size; + +/* The array containing cover classes (see also comments for macro + IRA_COVER_CLASSES). Only first IRA_REG_CLASS_COVER_SIZE elements are + used for this. */ +extern enum reg_class ira_reg_class_cover[N_REG_CLASSES]; + +/* The value is number of elements in the subsequent array. */ +extern int ira_important_classes_num; + +/* The array containing non-empty classes (including non-empty cover + classes) which are subclasses of cover classes. Such classes is + important for calculation of the hard register usage costs. */ +extern enum reg_class ira_important_classes[N_REG_CLASSES]; + +/* The array containing indexes of important classes in the previous + array. The array elements are defined only for important + classes. */ +extern int ira_important_class_nums[N_REG_CLASSES]; + +/* Map of all register classes to corresponding cover class containing + the given class. If given class is not a subset of a cover class, + we translate it into the cheapest cover class. */ +extern enum reg_class ira_class_translate[N_REG_CLASSES]; + +/* The biggest important class inside of intersection of the two + classes (that is calculated taking only hard registers available + for allocation into account). If the both classes contain no hard + registers available for allocation, the value is calculated with + taking all hard-registers including fixed ones into account. */ +extern enum reg_class ira_reg_class_intersect[N_REG_CLASSES][N_REG_CLASSES]; + +/* The biggest important class inside of union of the two classes + (that is calculated taking only hard registers available for + allocation into account). If the both classes contain no hard + registers available for allocation, the value is calculated with + taking all hard-registers including fixed ones into account. In + other words, the value is the corresponding reg_class_subunion + value. */ +extern enum reg_class ira_reg_class_union[N_REG_CLASSES][N_REG_CLASSES]; + +extern void *ira_allocate (size_t); +extern void *ira_reallocate (void *, size_t); +extern void ira_free (void *addr); +extern bitmap ira_allocate_bitmap (void); +extern void ira_free_bitmap (bitmap); +extern void ira_print_disposition (FILE *); +extern void ira_debug_disposition (void); +extern void ira_debug_class_cover (void); +extern void ira_init_register_move_cost (enum machine_mode); + +/* The length of the two following arrays. */ +extern int ira_reg_equiv_len; + +/* The element value is TRUE if the corresponding regno value is + invariant. */ +extern bool *ira_reg_equiv_invariant_p; + +/* The element value is equiv constant of given pseudo-register or + NULL_RTX. */ +extern rtx *ira_reg_equiv_const; + +/* ira-build.c */ + +/* The current loop tree node and its regno allocno map. */ +extern ira_loop_tree_node_t ira_curr_loop_tree_node; +extern ira_allocno_t *ira_curr_regno_allocno_map; + +extern void ira_debug_allocno_copies (ira_allocno_t); + +extern void ira_traverse_loop_tree (bool, ira_loop_tree_node_t, + void (*) (ira_loop_tree_node_t), + void (*) (ira_loop_tree_node_t)); +extern ira_allocno_t ira_create_allocno (int, bool, ira_loop_tree_node_t); +extern void ira_set_allocno_cover_class (ira_allocno_t, enum reg_class); +extern bool ira_conflict_vector_profitable_p (ira_allocno_t, int); +extern void ira_allocate_allocno_conflict_vec (ira_allocno_t, int); +extern void ira_allocate_allocno_conflicts (ira_allocno_t, int); +extern void ira_add_allocno_conflict (ira_allocno_t, ira_allocno_t); +extern void ira_print_expanded_allocno (ira_allocno_t); +extern allocno_live_range_t ira_create_allocno_live_range + (ira_allocno_t, int, int, allocno_live_range_t); +extern void ira_finish_allocno_live_range (allocno_live_range_t); +extern void ira_free_allocno_updated_costs (ira_allocno_t); +extern ira_copy_t ira_create_copy (ira_allocno_t, ira_allocno_t, + int, rtx, ira_loop_tree_node_t); +extern void ira_add_allocno_copy_to_list (ira_copy_t); +extern void ira_swap_allocno_copy_ends_if_necessary (ira_copy_t); +extern void ira_remove_allocno_copy_from_list (ira_copy_t); +extern ira_copy_t ira_add_allocno_copy (ira_allocno_t, ira_allocno_t, int, rtx, + ira_loop_tree_node_t); + +extern int *ira_allocate_cost_vector (enum reg_class); +extern void ira_free_cost_vector (int *, enum reg_class); + +extern void ira_flattening (int, int); +extern bool ira_build (bool); +extern void ira_destroy (void); + +/* ira-costs.c */ +extern void ira_init_costs_once (void); +extern void ira_init_costs (void); +extern void ira_finish_costs_once (void); +extern void ira_costs (void); +extern void ira_tune_allocno_costs_and_cover_classes (void); + +/* ira-lives.c */ + +extern void ira_rebuild_start_finish_chains (void); +extern void ira_print_live_range_list (FILE *, allocno_live_range_t); +extern void ira_debug_live_range_list (allocno_live_range_t); +extern void ira_debug_allocno_live_ranges (ira_allocno_t); +extern void ira_debug_live_ranges (void); +extern void ira_create_allocno_live_ranges (void); +extern void ira_finish_allocno_live_ranges (void); + +/* ira-conflicts.c */ +extern bool ira_allocno_live_ranges_intersect_p (ira_allocno_t, ira_allocno_t); +extern bool ira_pseudo_live_ranges_intersect_p (int, int); +extern void ira_debug_conflicts (bool); +extern void ira_build_conflicts (void); + +/* ira-color.c */ +extern int ira_loop_edge_freq (ira_loop_tree_node_t, int, bool); +extern void ira_reassign_conflict_allocnos (int); +extern void ira_initiate_assign (void); +extern void ira_finish_assign (void); +extern void ira_color (void); +extern void ira_fast_allocation (void); + +/* ira-emit.c */ +extern void ira_emit (bool); + + + +/* The iterator for all allocnos. */ +typedef struct { + /* The number of the current element in IRA_ALLOCNOS. */ + int n; +} ira_allocno_iterator; + +/* Initialize the iterator I. */ +static inline void +ira_allocno_iter_init (ira_allocno_iterator *i) +{ + i->n = 0; +} + +/* Return TRUE if we have more allocnos to visit, in which case *A is + set to the allocno to be visited. Otherwise, return FALSE. */ +static inline bool +ira_allocno_iter_cond (ira_allocno_iterator *i, ira_allocno_t *a) +{ + int n; + + for (n = i->n; n < ira_allocnos_num; n++) + if (ira_allocnos[n] != NULL) + { + *a = ira_allocnos[n]; + i->n = n + 1; + return true; + } + return false; +} + +/* Loop over all allocnos. In each iteration, A is set to the next + allocno. ITER is an instance of ira_allocno_iterator used to iterate + the allocnos. */ +#define FOR_EACH_ALLOCNO(A, ITER) \ + for (ira_allocno_iter_init (&(ITER)); \ + ira_allocno_iter_cond (&(ITER), &(A));) + + + + +/* The iterator for copies. */ +typedef struct { + /* The number of the current element in IRA_COPIES. */ + int n; +} ira_copy_iterator; + +/* Initialize the iterator I. */ +static inline void +ira_copy_iter_init (ira_copy_iterator *i) +{ + i->n = 0; +} + +/* Return TRUE if we have more copies to visit, in which case *CP is + set to the copy to be visited. Otherwise, return FALSE. */ +static inline bool +ira_copy_iter_cond (ira_copy_iterator *i, ira_copy_t *cp) +{ + int n; + + for (n = i->n; n < ira_copies_num; n++) + if (ira_copies[n] != NULL) + { + *cp = ira_copies[n]; + i->n = n + 1; + return true; + } + return false; +} + +/* Loop over all copies. In each iteration, C is set to the next + copy. ITER is an instance of ira_copy_iterator used to iterate + the copies. */ +#define FOR_EACH_COPY(C, ITER) \ + for (ira_copy_iter_init (&(ITER)); \ + ira_copy_iter_cond (&(ITER), &(C));) + + + + +/* The iterator for allocno conflicts. */ +typedef struct { + + /* TRUE if the conflicts are represented by vector of allocnos. */ + bool allocno_conflict_vec_p; + + /* The conflict vector or conflict bit vector. */ + void *vec; + + /* The number of the current element in the vector (of type + ira_allocno_t or IRA_INT_TYPE). */ + unsigned int word_num; + + /* The bit vector size. It is defined only if + ALLOCNO_CONFLICT_VEC_P is FALSE. */ + unsigned int size; + + /* The current bit index of bit vector. It is defined only if + ALLOCNO_CONFLICT_VEC_P is FALSE. */ + unsigned int bit_num; + + /* Allocno conflict id corresponding to the 1st bit of the bit + vector. It is defined only if ALLOCNO_CONFLICT_VEC_P is + FALSE. */ + int base_conflict_id; + + /* The word of bit vector currently visited. It is defined only if + ALLOCNO_CONFLICT_VEC_P is FALSE. */ + unsigned IRA_INT_TYPE word; +} ira_allocno_conflict_iterator; + +/* Initialize the iterator I with ALLOCNO conflicts. */ +static inline void +ira_allocno_conflict_iter_init (ira_allocno_conflict_iterator *i, + ira_allocno_t allocno) +{ + i->allocno_conflict_vec_p = ALLOCNO_CONFLICT_VEC_P (allocno); + i->vec = ALLOCNO_CONFLICT_ALLOCNO_ARRAY (allocno); + i->word_num = 0; + if (i->allocno_conflict_vec_p) + i->size = i->bit_num = i->base_conflict_id = i->word = 0; + else + { + if (ALLOCNO_MIN (allocno) > ALLOCNO_MAX (allocno)) + i->size = 0; + else + i->size = ((ALLOCNO_MAX (allocno) - ALLOCNO_MIN (allocno) + + IRA_INT_BITS) + / IRA_INT_BITS) * sizeof (IRA_INT_TYPE); + i->bit_num = 0; + i->base_conflict_id = ALLOCNO_MIN (allocno); + i->word = (i->size == 0 ? 0 : ((IRA_INT_TYPE *) i->vec)[0]); + } +} + +/* Return TRUE if we have more conflicting allocnos to visit, in which + case *A is set to the allocno to be visited. Otherwise, return + FALSE. */ +static inline bool +ira_allocno_conflict_iter_cond (ira_allocno_conflict_iterator *i, + ira_allocno_t *a) +{ + ira_allocno_t conflict_allocno; + + if (i->allocno_conflict_vec_p) + { + conflict_allocno = ((ira_allocno_t *) i->vec)[i->word_num]; + if (conflict_allocno == NULL) + return false; + *a = conflict_allocno; + return true; + } + else + { + /* Skip words that are zeros. */ + for (; i->word == 0; i->word = ((IRA_INT_TYPE *) i->vec)[i->word_num]) + { + i->word_num++; + + /* If we have reached the end, break. */ + if (i->word_num * sizeof (IRA_INT_TYPE) >= i->size) + return false; + + i->bit_num = i->word_num * IRA_INT_BITS; + } + + /* Skip bits that are zero. */ + for (; (i->word & 1) == 0; i->word >>= 1) + i->bit_num++; + + *a = ira_conflict_id_allocno_map[i->bit_num + i->base_conflict_id]; + + return true; + } +} + +/* Advance to the next conflicting allocno. */ +static inline void +ira_allocno_conflict_iter_next (ira_allocno_conflict_iterator *i) +{ + if (i->allocno_conflict_vec_p) + i->word_num++; + else + { + i->word >>= 1; + i->bit_num++; + } +} + +/* Loop over all allocnos conflicting with ALLOCNO. In each + iteration, A is set to the next conflicting allocno. ITER is an + instance of ira_allocno_conflict_iterator used to iterate the + conflicts. */ +#define FOR_EACH_ALLOCNO_CONFLICT(ALLOCNO, A, ITER) \ + for (ira_allocno_conflict_iter_init (&(ITER), (ALLOCNO)); \ + ira_allocno_conflict_iter_cond (&(ITER), &(A)); \ + ira_allocno_conflict_iter_next (&(ITER))) + + + +/* The function returns TRUE if hard registers starting with + HARD_REGNO and containing value of MODE are not in set + HARD_REGSET. */ +static inline bool +ira_hard_reg_not_in_set_p (int hard_regno, enum machine_mode mode, + HARD_REG_SET hard_regset) +{ + int i; + + ira_assert (hard_regno >= 0); + for (i = hard_regno_nregs[hard_regno][mode] - 1; i >= 0; i--) + if (TEST_HARD_REG_BIT (hard_regset, hard_regno + i)) + return false; + return true; +} + + + +/* To save memory we use a lazy approach for allocation and + initialization of the cost vectors. We do this only when it is + really necessary. */ + +/* Allocate cost vector *VEC for hard registers of COVER_CLASS and + initialize the elements by VAL if it is necessary */ +static inline void +ira_allocate_and_set_costs (int **vec, enum reg_class cover_class, int val) +{ + int i, *reg_costs; + int len; + + if (*vec != NULL) + return; + *vec = reg_costs = ira_allocate_cost_vector (cover_class); + len = ira_class_hard_regs_num[cover_class]; + for (i = 0; i < len; i++) + reg_costs[i] = val; +} + +/* Allocate cost vector *VEC for hard registers of COVER_CLASS and + copy values of vector SRC into the vector if it is necessary */ +static inline void +ira_allocate_and_copy_costs (int **vec, enum reg_class cover_class, int *src) +{ + int len; + + if (*vec != NULL || src == NULL) + return; + *vec = ira_allocate_cost_vector (cover_class); + len = ira_class_hard_regs_num[cover_class]; + memcpy (*vec, src, sizeof (int) * len); +} + +/* Allocate cost vector *VEC for hard registers of COVER_CLASS and + add values of vector SRC into the vector if it is necessary */ +static inline void +ira_allocate_and_accumulate_costs (int **vec, enum reg_class cover_class, + int *src) +{ + int i, len; + + if (src == NULL) + return; + len = ira_class_hard_regs_num[cover_class]; + if (*vec == NULL) + { + *vec = ira_allocate_cost_vector (cover_class); + memset (*vec, 0, sizeof (int) * len); + } + for (i = 0; i < len; i++) + (*vec)[i] += src[i]; +} + +/* Allocate cost vector *VEC for hard registers of COVER_CLASS and + copy values of vector SRC into the vector or initialize it by VAL + (if SRC is null). */ +static inline void +ira_allocate_and_set_or_copy_costs (int **vec, enum reg_class cover_class, + int val, int *src) +{ + int i, *reg_costs; + int len; + + if (*vec != NULL) + return; + *vec = reg_costs = ira_allocate_cost_vector (cover_class); + len = ira_class_hard_regs_num[cover_class]; + if (src != NULL) + memcpy (reg_costs, src, sizeof (int) * len); + else + { + for (i = 0; i < len; i++) + reg_costs[i] = val; + } +} diff --git a/gcc/ira-lives.c b/gcc/ira-lives.c new file mode 100644 index 00000000000..7d6b29fedf1 --- /dev/null +++ b/gcc/ira-lives.c @@ -0,0 +1,967 @@ +/* IRA processing allocno lives to build allocno live ranges. + Copyright (C) 2006, 2007, 2008 + Free Software Foundation, Inc. + Contributed by Vladimir Makarov <vmakarov@redhat.com>. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 3, or (at your option) any later +version. + +GCC is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +<http://www.gnu.org/licenses/>. */ + +#include "config.h" +#include "system.h" +#include "coretypes.h" +#include "tm.h" +#include "regs.h" +#include "rtl.h" +#include "tm_p.h" +#include "target.h" +#include "flags.h" +#include "hard-reg-set.h" +#include "basic-block.h" +#include "insn-config.h" +#include "recog.h" +#include "toplev.h" +#include "params.h" +#include "df.h" +#include "sparseset.h" +#include "ira-int.h" + +/* The code in this file is similar to one in global but the code + works on the allocno basis and creates live ranges instead of + pseudo-register conflicts. */ + +/* Program points are enumerated by numbers from range + 0..IRA_MAX_POINT-1. There are approximately two times more program + points than insns. Program points are places in the program where + liveness info can be changed. In most general case (there are more + complicated cases too) some program points correspond to places + where input operand dies and other ones correspond to places where + output operands are born. */ +int ira_max_point; + +/* Arrays of size IRA_MAX_POINT mapping a program point to the allocno + live ranges with given start/finish point. */ +allocno_live_range_t *ira_start_point_ranges, *ira_finish_point_ranges; + +/* Number of the current program point. */ +static int curr_point; + +/* Point where register pressure excess started or -1 if there is no + register pressure excess. Excess pressure for a register class at + some point means that there are more allocnos of given register + class living at the point than number of hard-registers of the + class available for the allocation. It is defined only for cover + classes. */ +static int high_pressure_start_point[N_REG_CLASSES]; + +/* Allocnos live at current point in the scan. */ +static sparseset allocnos_live; + +/* Set of hard regs (except eliminable ones) currently live. */ +static HARD_REG_SET hard_regs_live; + +/* The loop tree node corresponding to the current basic block. */ +static ira_loop_tree_node_t curr_bb_node; + +/* The function processing birth of register REGNO. It updates living + hard regs and conflict hard regs for living allocnos or starts a + new live range for the allocno corresponding to REGNO if it is + necessary. */ +static void +make_regno_born (int regno) +{ + unsigned int i; + ira_allocno_t a; + allocno_live_range_t p; + + if (regno < FIRST_PSEUDO_REGISTER) + { + SET_HARD_REG_BIT (hard_regs_live, regno); + EXECUTE_IF_SET_IN_SPARSESET (allocnos_live, i) + { + SET_HARD_REG_BIT (ALLOCNO_CONFLICT_HARD_REGS (ira_allocnos[i]), + regno); + SET_HARD_REG_BIT (ALLOCNO_TOTAL_CONFLICT_HARD_REGS (ira_allocnos[i]), + regno); + } + return; + } + a = ira_curr_regno_allocno_map[regno]; + if (a == NULL) + return; + if ((p = ALLOCNO_LIVE_RANGES (a)) == NULL + || (p->finish != curr_point && p->finish + 1 != curr_point)) + ALLOCNO_LIVE_RANGES (a) + = ira_create_allocno_live_range (a, curr_point, -1, + ALLOCNO_LIVE_RANGES (a)); +} + +/* Update ALLOCNO_EXCESS_PRESSURE_POINTS_NUM for allocno A. */ +static void +update_allocno_pressure_excess_length (ira_allocno_t a) +{ + int start; + enum reg_class cover_class; + allocno_live_range_t p; + + cover_class = ALLOCNO_COVER_CLASS (a); + if (high_pressure_start_point[cover_class] < 0) + return; + p = ALLOCNO_LIVE_RANGES (a); + ira_assert (p != NULL); + start = (high_pressure_start_point[cover_class] > p->start + ? high_pressure_start_point[cover_class] : p->start); + ALLOCNO_EXCESS_PRESSURE_POINTS_NUM (a) += curr_point - start + 1; +} + +/* Process the death of register REGNO. This updates hard_regs_live + or finishes the current live range for the allocno corresponding to + REGNO. */ +static void +make_regno_dead (int regno) +{ + ira_allocno_t a; + allocno_live_range_t p; + + if (regno < FIRST_PSEUDO_REGISTER) + { + CLEAR_HARD_REG_BIT (hard_regs_live, regno); + return; + } + a = ira_curr_regno_allocno_map[regno]; + if (a == NULL) + return; + p = ALLOCNO_LIVE_RANGES (a); + ira_assert (p != NULL); + p->finish = curr_point; + update_allocno_pressure_excess_length (a); +} + +/* Process the birth and, right after then, death of register + REGNO. */ +static void +make_regno_born_and_dead (int regno) +{ + make_regno_born (regno); + make_regno_dead (regno); +} + +/* The current register pressures for each cover class for the current + basic block. */ +static int curr_reg_pressure[N_REG_CLASSES]; + +/* Mark allocno A as currently living and update current register + pressure, maximal register pressure for the current BB, start point + of the register pressure excess, and conflicting hard registers of + A. */ +static void +set_allocno_live (ira_allocno_t a) +{ + int nregs; + enum reg_class cover_class; + + if (sparseset_bit_p (allocnos_live, ALLOCNO_NUM (a))) + return; + sparseset_set_bit (allocnos_live, ALLOCNO_NUM (a)); + IOR_HARD_REG_SET (ALLOCNO_CONFLICT_HARD_REGS (a), hard_regs_live); + IOR_HARD_REG_SET (ALLOCNO_TOTAL_CONFLICT_HARD_REGS (a), hard_regs_live); + cover_class = ALLOCNO_COVER_CLASS (a); + nregs = ira_reg_class_nregs[cover_class][ALLOCNO_MODE (a)]; + curr_reg_pressure[cover_class] += nregs; + if (high_pressure_start_point[cover_class] < 0 + && (curr_reg_pressure[cover_class] + > ira_available_class_regs[cover_class])) + high_pressure_start_point[cover_class] = curr_point; + if (curr_bb_node->reg_pressure[cover_class] + < curr_reg_pressure[cover_class]) + curr_bb_node->reg_pressure[cover_class] = curr_reg_pressure[cover_class]; +} + +/* Mark allocno A as currently not living and update current register + pressure, start point of the register pressure excess, and register + pressure excess length for living allocnos. */ +static void +clear_allocno_live (ira_allocno_t a) +{ + unsigned int i; + enum reg_class cover_class; + + if (sparseset_bit_p (allocnos_live, ALLOCNO_NUM (a))) + { + cover_class = ALLOCNO_COVER_CLASS (a); + curr_reg_pressure[cover_class] + -= ira_reg_class_nregs[cover_class][ALLOCNO_MODE (a)]; + ira_assert (curr_reg_pressure[cover_class] >= 0); + if (high_pressure_start_point[cover_class] >= 0 + && (curr_reg_pressure[cover_class] + <= ira_available_class_regs[cover_class])) + { + EXECUTE_IF_SET_IN_SPARSESET (allocnos_live, i) + { + update_allocno_pressure_excess_length (ira_allocnos[i]); + } + high_pressure_start_point[cover_class] = -1; + } + } + sparseset_clear_bit (allocnos_live, ALLOCNO_NUM (a)); +} + +/* Record all regs that are set in any one insn. Communication from + mark_reg_{store,clobber}. */ +static VEC(rtx, heap) *regs_set; + +/* Handle the case where REG is set by the insn being scanned, during + the scan to build live ranges and calculate reg pressure info. + Store a 1 in hard_regs_live or allocnos_live for this register or + the corresponding allocno, record how many consecutive hardware + registers it actually needs. + + Note that even if REG does not remain alive after this insn, we + must mark it here as live, to ensure a conflict between REG and any + other reg allocnos set in this insn that really do live. This is + because those other allocnos could be considered after this. + + REG might actually be something other than a register; if so, we do + nothing. + + SETTER is 0 if this register was modified by an auto-increment + (i.e., a REG_INC note was found for it). */ +static void +mark_reg_store (rtx reg, const_rtx setter ATTRIBUTE_UNUSED, + void *data ATTRIBUTE_UNUSED) +{ + int regno; + + if (GET_CODE (reg) == SUBREG) + reg = SUBREG_REG (reg); + + if (! REG_P (reg)) + return; + + VEC_safe_push (rtx, heap, regs_set, reg); + + regno = REGNO (reg); + + if (regno >= FIRST_PSEUDO_REGISTER) + { + ira_allocno_t a = ira_curr_regno_allocno_map[regno]; + + if (a != NULL) + { + if (sparseset_bit_p (allocnos_live, ALLOCNO_NUM (a))) + return; + set_allocno_live (a); + } + make_regno_born (regno); + } + else if (! TEST_HARD_REG_BIT (ira_no_alloc_regs, regno)) + { + int last = regno + hard_regno_nregs[regno][GET_MODE (reg)]; + enum reg_class cover_class; + + while (regno < last) + { + if (! TEST_HARD_REG_BIT (hard_regs_live, regno) + && ! TEST_HARD_REG_BIT (eliminable_regset, regno)) + { + cover_class = ira_class_translate[REGNO_REG_CLASS (regno)]; + if (cover_class != NO_REGS) + { + curr_reg_pressure[cover_class]++; + if (high_pressure_start_point[cover_class] < 0 + && (curr_reg_pressure[cover_class] + > ira_available_class_regs[cover_class])) + high_pressure_start_point[cover_class] = curr_point; + } + make_regno_born (regno); + if (cover_class != NO_REGS + && (curr_bb_node->reg_pressure[cover_class] + < curr_reg_pressure[cover_class])) + curr_bb_node->reg_pressure[cover_class] + = curr_reg_pressure[cover_class]; + } + regno++; + } + } +} + +/* Like mark_reg_store except notice just CLOBBERs; ignore SETs. */ +static void +mark_reg_clobber (rtx reg, const_rtx setter, void *data) +{ + if (GET_CODE (setter) == CLOBBER) + mark_reg_store (reg, setter, data); +} + +/* Record that hard register REG (if it is a hard register) has + conflicts with all the allocno currently live or the corresponding + allocno lives at just the current program point. Do not mark REG + (or the allocno) itself as live. */ +static void +mark_reg_conflicts (rtx reg) +{ + int regno; + + if (GET_CODE (reg) == SUBREG) + reg = SUBREG_REG (reg); + + if (! REG_P (reg)) + return; + + regno = REGNO (reg); + + if (regno >= FIRST_PSEUDO_REGISTER) + make_regno_born_and_dead (regno); + else if (! TEST_HARD_REG_BIT (ira_no_alloc_regs, regno)) + { + int last = regno + hard_regno_nregs[regno][GET_MODE (reg)]; + + while (regno < last) + { + make_regno_born_and_dead (regno); + regno++; + } + } +} + +/* Mark REG (or the corresponding allocno) as being dead (following + the insn being scanned now). Store a 0 in hard_regs_live or + allocnos_live for the register. */ +static void +mark_reg_death (rtx reg) +{ + unsigned int i; + int regno = REGNO (reg); + + if (regno >= FIRST_PSEUDO_REGISTER) + { + ira_allocno_t a = ira_curr_regno_allocno_map[regno]; + + if (a != NULL) + { + if (! sparseset_bit_p (allocnos_live, ALLOCNO_NUM (a))) + return; + clear_allocno_live (a); + } + make_regno_dead (regno); + } + else if (! TEST_HARD_REG_BIT (ira_no_alloc_regs, regno)) + { + int last = regno + hard_regno_nregs[regno][GET_MODE (reg)]; + enum reg_class cover_class; + + while (regno < last) + { + if (TEST_HARD_REG_BIT (hard_regs_live, regno)) + { + cover_class = ira_class_translate[REGNO_REG_CLASS (regno)]; + if (cover_class != NO_REGS) + { + curr_reg_pressure[cover_class]--; + if (high_pressure_start_point[cover_class] >= 0 + && (curr_reg_pressure[cover_class] + <= ira_available_class_regs[cover_class])) + { + EXECUTE_IF_SET_IN_SPARSESET (allocnos_live, i) + { + update_allocno_pressure_excess_length + (ira_allocnos[i]); + } + high_pressure_start_point[cover_class] = -1; + } + ira_assert (curr_reg_pressure[cover_class] >= 0); + } + make_regno_dead (regno); + } + regno++; + } + } +} + +/* Checks that CONSTRAINTS permits to use only one hard register. If + it is so, the function returns the class of the hard register. + Otherwise it returns NO_REGS. */ +static enum reg_class +single_reg_class (const char *constraints, rtx op, rtx equiv_const) +{ + int ignore_p; + enum reg_class cl, next_cl; + int c; + + cl = NO_REGS; + for (ignore_p = false; + (c = *constraints); + constraints += CONSTRAINT_LEN (c, constraints)) + if (c == '#') + ignore_p = true; + else if (c == ',') + ignore_p = false; + else if (! ignore_p) + switch (c) + { + case ' ': + case '\t': + case '=': + case '+': + case '*': + case '&': + case '%': + case '!': + case '?': + break; + case 'i': + if (CONSTANT_P (op) + || (equiv_const != NULL_RTX && CONSTANT_P (equiv_const))) + return NO_REGS; + break; + + case 'n': + if (GET_CODE (op) == CONST_INT + || (GET_CODE (op) == CONST_DOUBLE && GET_MODE (op) == VOIDmode) + || (equiv_const != NULL_RTX + && (GET_CODE (equiv_const) == CONST_INT + || (GET_CODE (equiv_const) == CONST_DOUBLE + && GET_MODE (equiv_const) == VOIDmode)))) + return NO_REGS; + break; + + case 's': + if ((CONSTANT_P (op) && GET_CODE (op) != CONST_INT + && (GET_CODE (op) != CONST_DOUBLE || GET_MODE (op) != VOIDmode)) + || (equiv_const != NULL_RTX + && CONSTANT_P (equiv_const) + && GET_CODE (equiv_const) != CONST_INT + && (GET_CODE (equiv_const) != CONST_DOUBLE + || GET_MODE (equiv_const) != VOIDmode))) + return NO_REGS; + break; + + case 'I': + case 'J': + case 'K': + case 'L': + case 'M': + case 'N': + case 'O': + case 'P': + if ((GET_CODE (op) == CONST_INT + && CONST_OK_FOR_CONSTRAINT_P (INTVAL (op), c, constraints)) + || (equiv_const != NULL_RTX + && GET_CODE (equiv_const) == CONST_INT + && CONST_OK_FOR_CONSTRAINT_P (INTVAL (equiv_const), + c, constraints))) + return NO_REGS; + break; + + case 'E': + case 'F': + if (GET_CODE (op) == CONST_DOUBLE + || (GET_CODE (op) == CONST_VECTOR + && GET_MODE_CLASS (GET_MODE (op)) == MODE_VECTOR_FLOAT) + || (equiv_const != NULL_RTX + && (GET_CODE (equiv_const) == CONST_DOUBLE + || (GET_CODE (equiv_const) == CONST_VECTOR + && (GET_MODE_CLASS (GET_MODE (equiv_const)) + == MODE_VECTOR_FLOAT))))) + return NO_REGS; + break; + + case 'G': + case 'H': + if ((GET_CODE (op) == CONST_DOUBLE + && CONST_DOUBLE_OK_FOR_CONSTRAINT_P (op, c, constraints)) + || (equiv_const != NULL_RTX + && GET_CODE (equiv_const) == CONST_DOUBLE + && CONST_DOUBLE_OK_FOR_CONSTRAINT_P (equiv_const, + c, constraints))) + return NO_REGS; + /* ??? what about memory */ + case 'r': + case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': + case 'h': case 'j': case 'k': case 'l': + case 'q': case 't': case 'u': + case 'v': case 'w': case 'x': case 'y': case 'z': + case 'A': case 'B': case 'C': case 'D': + case 'Q': case 'R': case 'S': case 'T': case 'U': + case 'W': case 'Y': case 'Z': + next_cl = (c == 'r' + ? GENERAL_REGS + : REG_CLASS_FROM_CONSTRAINT (c, constraints)); + if ((cl != NO_REGS && next_cl != cl) + || ira_available_class_regs[next_cl] > 1) + return NO_REGS; + cl = next_cl; + break; + + case '0': case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': + next_cl + = single_reg_class (recog_data.constraints[c - '0'], + recog_data.operand[c - '0'], NULL_RTX); + if ((cl != NO_REGS && next_cl != cl) || next_cl == NO_REGS + || ira_available_class_regs[next_cl] > 1) + return NO_REGS; + cl = next_cl; + break; + + default: + return NO_REGS; + } + return cl; +} + +/* The function checks that operand OP_NUM of the current insn can use + only one hard register. If it is so, the function returns the + class of the hard register. Otherwise it returns NO_REGS. */ +static enum reg_class +single_reg_operand_class (int op_num) +{ + if (op_num < 0 || recog_data.n_alternatives == 0) + return NO_REGS; + return single_reg_class (recog_data.constraints[op_num], + recog_data.operand[op_num], NULL_RTX); +} + +/* Processes input operands, if IN_P, or output operands otherwise of + the current insn with FREQ to find allocno which can use only one + hard register and makes other currently living allocnos conflicting + with the hard register. */ +static void +process_single_reg_class_operands (bool in_p, int freq) +{ + int i, regno, cost; + unsigned int px; + enum reg_class cl, cover_class; + rtx operand; + ira_allocno_t operand_a, a; + + for (i = 0; i < recog_data.n_operands; i++) + { + operand = recog_data.operand[i]; + if (in_p && recog_data.operand_type[i] != OP_IN + && recog_data.operand_type[i] != OP_INOUT) + continue; + if (! in_p && recog_data.operand_type[i] != OP_OUT + && recog_data.operand_type[i] != OP_INOUT) + continue; + cl = single_reg_operand_class (i); + if (cl == NO_REGS) + continue; + + operand_a = NULL; + + if (GET_CODE (operand) == SUBREG) + operand = SUBREG_REG (operand); + + if (REG_P (operand) + && (regno = REGNO (operand)) >= FIRST_PSEUDO_REGISTER) + { + enum machine_mode mode; + enum reg_class cover_class; + + operand_a = ira_curr_regno_allocno_map[regno]; + mode = ALLOCNO_MODE (operand_a); + cover_class = ALLOCNO_COVER_CLASS (operand_a); + if (ira_class_subset_p[cl][cover_class] + && ira_class_hard_regs_num[cl] != 0 + && (ira_class_hard_reg_index[cover_class] + [ira_class_hard_regs[cl][0]]) >= 0 + && reg_class_size[cl] <= (unsigned) CLASS_MAX_NREGS (cl, mode)) + { + /* ??? FREQ */ + cost = freq * (in_p + ? ira_register_move_cost[mode][cover_class][cl] + : ira_register_move_cost[mode][cl][cover_class]); + ira_allocate_and_set_costs + (&ALLOCNO_CONFLICT_HARD_REG_COSTS (operand_a), cover_class, 0); + ALLOCNO_CONFLICT_HARD_REG_COSTS (operand_a) + [ira_class_hard_reg_index + [cover_class][ira_class_hard_regs[cl][0]]] + -= cost; + } + } + + EXECUTE_IF_SET_IN_SPARSESET (allocnos_live, px) + { + a = ira_allocnos[px]; + cover_class = ALLOCNO_COVER_CLASS (a); + if (a != operand_a) + { + /* We could increase costs of A instead of making it + conflicting with the hard register. But it works worse + because it will be spilled in reload in anyway. */ + IOR_HARD_REG_SET (ALLOCNO_CONFLICT_HARD_REGS (a), + reg_class_contents[cl]); + IOR_HARD_REG_SET (ALLOCNO_TOTAL_CONFLICT_HARD_REGS (a), + reg_class_contents[cl]); + } + } + } +} + +/* Process insns of the basic block given by its LOOP_TREE_NODE to + update allocno live ranges, allocno hard register conflicts, + intersected calls, and register pressure info for allocnos for the + basic block for and regions containing the basic block. */ +static void +process_bb_node_lives (ira_loop_tree_node_t loop_tree_node) +{ + int i; + unsigned int j; + basic_block bb; + rtx insn; + edge e; + edge_iterator ei; + bitmap_iterator bi; + bitmap reg_live_in; + unsigned int px; + + bb = loop_tree_node->bb; + if (bb != NULL) + { + for (i = 0; i < ira_reg_class_cover_size; i++) + { + curr_reg_pressure[ira_reg_class_cover[i]] = 0; + high_pressure_start_point[ira_reg_class_cover[i]] = -1; + } + curr_bb_node = loop_tree_node; + reg_live_in = DF_LR_IN (bb); + sparseset_clear (allocnos_live); + REG_SET_TO_HARD_REG_SET (hard_regs_live, reg_live_in); + AND_COMPL_HARD_REG_SET (hard_regs_live, eliminable_regset); + AND_COMPL_HARD_REG_SET (hard_regs_live, ira_no_alloc_regs); + for (i = 0; i < FIRST_PSEUDO_REGISTER; i++) + if (TEST_HARD_REG_BIT (hard_regs_live, i)) + { + enum reg_class cover_class; + + cover_class = REGNO_REG_CLASS (i); + if (cover_class == NO_REGS) + continue; + cover_class = ira_class_translate[cover_class]; + curr_reg_pressure[cover_class]++; + if (curr_bb_node->reg_pressure[cover_class] + < curr_reg_pressure[cover_class]) + curr_bb_node->reg_pressure[cover_class] + = curr_reg_pressure[cover_class]; + ira_assert (curr_reg_pressure[cover_class] + <= ira_available_class_regs[cover_class]); + } + EXECUTE_IF_SET_IN_BITMAP (reg_live_in, FIRST_PSEUDO_REGISTER, j, bi) + { + ira_allocno_t a = ira_curr_regno_allocno_map[j]; + + if (a == NULL) + continue; + ira_assert (! sparseset_bit_p (allocnos_live, ALLOCNO_NUM (a))); + set_allocno_live (a); + make_regno_born (j); + } + +#ifdef EH_RETURN_DATA_REGNO + if (bb_has_eh_pred (bb)) + { + for (j = 0; ; ++j) + { + unsigned int regno = EH_RETURN_DATA_REGNO (j); + + if (regno == INVALID_REGNUM) + break; + make_regno_born_and_dead (regno); + } + } +#endif + + /* Allocnos can't go in stack regs at the start of a basic block + that is reached by an abnormal edge. Likewise for call + clobbered regs, because caller-save, fixup_abnormal_edges and + possibly the table driven EH machinery are not quite ready to + handle such allocnos live across such edges. */ + FOR_EACH_EDGE (e, ei, bb->preds) + if (e->flags & EDGE_ABNORMAL) + break; + + if (e != NULL) + { +#ifdef STACK_REGS + EXECUTE_IF_SET_IN_SPARSESET (allocnos_live, px) + { + ALLOCNO_NO_STACK_REG_P (ira_allocnos[px]) = true; + ALLOCNO_TOTAL_NO_STACK_REG_P (ira_allocnos[px]) = true; + } + for (px = FIRST_STACK_REG; px <= LAST_STACK_REG; px++) + make_regno_born_and_dead (px); +#endif + /* No need to record conflicts for call clobbered regs if we + have nonlocal labels around, as we don't ever try to + allocate such regs in this case. */ + if (!cfun->has_nonlocal_label) + for (px = 0; px < FIRST_PSEUDO_REGISTER; px++) + if (call_used_regs[px]) + make_regno_born_and_dead (px); + } + + /* Scan the code of this basic block, noting which allocnos and + hard regs are born or die. */ + FOR_BB_INSNS (bb, insn) + { + rtx link; + int freq; + + if (! INSN_P (insn)) + continue; + + freq = REG_FREQ_FROM_BB (BLOCK_FOR_INSN (insn)); + if (freq == 0) + freq = 1; + + if (internal_flag_ira_verbose > 2 && ira_dump_file != NULL) + fprintf (ira_dump_file, " Insn %u(l%d): point = %d\n", + INSN_UID (insn), loop_tree_node->parent->loop->num, + curr_point); + + /* Check regs_set is an empty set. */ + gcc_assert (VEC_empty (rtx, regs_set)); + + /* Mark any allocnos clobbered by INSN as live, so they + conflict with the inputs. */ + note_stores (PATTERN (insn), mark_reg_clobber, NULL); + + extract_insn (insn); + process_single_reg_class_operands (true, freq); + + /* Mark any allocnos dead after INSN as dead now. */ + for (link = REG_NOTES (insn); link; link = XEXP (link, 1)) + if (REG_NOTE_KIND (link) == REG_DEAD) + mark_reg_death (XEXP (link, 0)); + + curr_point++; + + if (CALL_P (insn)) + { + EXECUTE_IF_SET_IN_SPARSESET (allocnos_live, i) + { + ira_allocno_t a = ira_allocnos[i]; + + ALLOCNO_CALL_FREQ (a) += freq; + ALLOCNO_CALLS_CROSSED_NUM (a)++; + /* Don't allocate allocnos that cross calls, if this + function receives a nonlocal goto. */ + if (cfun->has_nonlocal_label) + { + SET_HARD_REG_SET (ALLOCNO_CONFLICT_HARD_REGS (a)); + SET_HARD_REG_SET (ALLOCNO_TOTAL_CONFLICT_HARD_REGS (a)); + } + } + } + + /* Mark any allocnos set in INSN as live. Clobbers are + processed again, so they will conflict with the reg + allocnos that are set. */ + note_stores (PATTERN (insn), mark_reg_store, NULL); + +#ifdef AUTO_INC_DEC + for (link = REG_NOTES (insn); link; link = XEXP (link, 1)) + if (REG_NOTE_KIND (link) == REG_INC) + mark_reg_store (XEXP (link, 0), NULL_RTX, NULL); +#endif + + /* If INSN has multiple outputs, then any allocno that dies + here and is used inside of an output must conflict with + the other outputs. + + It is unsafe to use !single_set here since it will ignore + an unused output. Just because an output is unused does + not mean the compiler can assume the side effect will not + occur. Consider if ALLOCNO appears in the address of an + output and we reload the output. If we allocate ALLOCNO + to the same hard register as an unused output we could + set the hard register before the output reload insn. */ + if (GET_CODE (PATTERN (insn)) == PARALLEL && multiple_sets (insn)) + for (link = REG_NOTES (insn); link; link = XEXP (link, 1)) + if (REG_NOTE_KIND (link) == REG_DEAD) + { + int i; + int used_in_output = 0; + rtx reg = XEXP (link, 0); + + for (i = XVECLEN (PATTERN (insn), 0) - 1; i >= 0; i--) + { + rtx set = XVECEXP (PATTERN (insn), 0, i); + + if (GET_CODE (set) == SET + && ! REG_P (SET_DEST (set)) + && ! rtx_equal_p (reg, SET_DEST (set)) + && reg_overlap_mentioned_p (reg, SET_DEST (set))) + used_in_output = 1; + } + if (used_in_output) + mark_reg_conflicts (reg); + } + + process_single_reg_class_operands (false, freq); + + /* Mark any allocnos set in INSN and then never used. */ + while (! VEC_empty (rtx, regs_set)) + { + rtx reg = VEC_pop (rtx, regs_set); + rtx note = find_regno_note (insn, REG_UNUSED, REGNO (reg)); + + if (note) + mark_reg_death (XEXP (note, 0)); + } + curr_point++; + } + EXECUTE_IF_SET_IN_SPARSESET (allocnos_live, i) + { + make_regno_dead (ALLOCNO_REGNO (ira_allocnos[i])); + } + + curr_point++; + + } + /* Propagate register pressure to upper loop tree nodes: */ + if (loop_tree_node != ira_loop_tree_root) + for (i = 0; i < ira_reg_class_cover_size; i++) + { + enum reg_class cover_class; + + cover_class = ira_reg_class_cover[i]; + if (loop_tree_node->reg_pressure[cover_class] + > loop_tree_node->parent->reg_pressure[cover_class]) + loop_tree_node->parent->reg_pressure[cover_class] + = loop_tree_node->reg_pressure[cover_class]; + } +} + +/* Create and set up IRA_START_POINT_RANGES and + IRA_FINISH_POINT_RANGES. */ +static void +create_start_finish_chains (void) +{ + ira_allocno_t a; + ira_allocno_iterator ai; + allocno_live_range_t r; + + ira_start_point_ranges + = (allocno_live_range_t *) ira_allocate (ira_max_point + * sizeof (allocno_live_range_t)); + memset (ira_start_point_ranges, 0, + ira_max_point * sizeof (allocno_live_range_t)); + ira_finish_point_ranges + = (allocno_live_range_t *) ira_allocate (ira_max_point + * sizeof (allocno_live_range_t)); + memset (ira_finish_point_ranges, 0, + ira_max_point * sizeof (allocno_live_range_t)); + FOR_EACH_ALLOCNO (a, ai) + { + for (r = ALLOCNO_LIVE_RANGES (a); r != NULL; r = r->next) + { + r->start_next = ira_start_point_ranges[r->start]; + ira_start_point_ranges[r->start] = r; + r->finish_next = ira_finish_point_ranges[r->finish]; + ira_finish_point_ranges[r->finish] = r; + } + } +} + +/* Rebuild IRA_START_POINT_RANGES and IRA_FINISH_POINT_RANGES after + new live ranges and program points were added as a result if new + insn generation. */ +void +ira_rebuild_start_finish_chains (void) +{ + ira_free (ira_finish_point_ranges); + ira_free (ira_start_point_ranges); + create_start_finish_chains (); +} + +/* Print live ranges R to file F. */ +void +ira_print_live_range_list (FILE *f, allocno_live_range_t r) +{ + for (; r != NULL; r = r->next) + fprintf (f, " [%d..%d]", r->start, r->finish); + fprintf (f, "\n"); +} + +/* Print live ranges R to stderr. */ +void +ira_debug_live_range_list (allocno_live_range_t r) +{ + ira_print_live_range_list (stderr, r); +} + +/* Print live ranges of allocno A to file F. */ +static void +print_allocno_live_ranges (FILE *f, ira_allocno_t a) +{ + fprintf (f, " a%d(r%d):", ALLOCNO_NUM (a), ALLOCNO_REGNO (a)); + ira_print_live_range_list (f, ALLOCNO_LIVE_RANGES (a)); +} + +/* Print live ranges of allocno A to stderr. */ +void +ira_debug_allocno_live_ranges (ira_allocno_t a) +{ + print_allocno_live_ranges (stderr, a); +} + +/* Print live ranges of all allocnos to file F. */ +static void +print_live_ranges (FILE *f) +{ + ira_allocno_t a; + ira_allocno_iterator ai; + + FOR_EACH_ALLOCNO (a, ai) + print_allocno_live_ranges (f, a); +} + +/* Print live ranges of all allocnos to stderr. */ +void +ira_debug_live_ranges (void) +{ + print_live_ranges (stderr); +} + +/* The main entry function creates live ranges, set up + CONFLICT_HARD_REGS and TOTAL_CONFLICT_HARD_REGS for allocnos, and + calculate register pressure info. */ +void +ira_create_allocno_live_ranges (void) +{ + allocnos_live = sparseset_alloc (ira_allocnos_num); + /* Make a vector that mark_reg_{store,clobber} will store in. */ + if (!regs_set) + regs_set = VEC_alloc (rtx, heap, 10); + curr_point = 0; + ira_traverse_loop_tree (true, ira_loop_tree_root, NULL, + process_bb_node_lives); + ira_max_point = curr_point; + create_start_finish_chains (); + if (internal_flag_ira_verbose > 2 && ira_dump_file != NULL) + print_live_ranges (ira_dump_file); + /* Clean up. */ + sparseset_free (allocnos_live); +} + +/* Free arrays IRA_START_POINT_RANGES and IRA_FINISH_POINT_RANGES. */ +void +ira_finish_allocno_live_ranges (void) +{ + ira_free (ira_finish_point_ranges); + ira_free (ira_start_point_ranges); +} diff --git a/gcc/ira.c b/gcc/ira.c new file mode 100644 index 00000000000..c98f0a05e6f --- /dev/null +++ b/gcc/ira.c @@ -0,0 +1,2064 @@ +/* Integrated Register Allocator (IRA) entry point. + Copyright (C) 2006, 2007, 2008 + Free Software Foundation, Inc. + Contributed by Vladimir Makarov <vmakarov@redhat.com>. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 3, or (at your option) any later +version. + +GCC is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +<http://www.gnu.org/licenses/>. */ + +/* The integrated register allocator (IRA) is a + regional register allocator performing graph coloring on a top-down + traversal of nested regions. Graph coloring in a region is based + on Chaitin-Briggs algorithm. It is called integrated because + register coalescing, register live range splitting, and choosing a + better hard register are done on-the-fly during coloring. Register + coalescing and choosing a cheaper hard register is done by hard + register preferencing during hard register assigning. The live + range splitting is a byproduct of the regional register allocation. + + Major IRA notions are: + + o *Region* is a part of CFG where graph coloring based on + Chaitin-Briggs algorithm is done. IRA can work on any set of + nested CFG regions forming a tree. Currently the regions are + the entire function for the root region and natural loops for + the other regions. Therefore data structure representing a + region is called loop_tree_node. + + o *Cover class* is a register class belonging to a set of + non-intersecting register classes containing all of the + hard-registers available for register allocation. The set of + all cover classes for a target is defined in the corresponding + machine-description file according some criteria. Such notion + is needed because Chaitin-Briggs algorithm works on + non-intersected register classes. + + o *Allocno* represents the live range of a pseudo-register in a + region. Besides the obvious attributes like the corresponding + pseudo-register number, cover class, conflicting allocnos and + conflicting hard-registers, there are a few allocno attributes + which are important for understanding the allocation algorithm: + + - *Live ranges*. This is a list of ranges of *program + points* where the allocno lives. Program points represent + places where a pseudo can be born or become dead (there are + approximately two times more program points than the insns) + and they are represented by integers starting with 0. The + live ranges are used to find conflicts between allocnos of + different cover classes. They also play very important role + for the transformation of the IRA internal representation of + several regions into a one region representation. The later is + used during the reload pass work because each allocno + represents all of the corresponding pseudo-registers. + + - *Hard-register costs*. This is a vector of size equal to the + number of available hard-registers of the allocno's cover + class. The cost of a callee-clobbered hard-register for an + allocno is increased by the cost of save/restore code around + the calls through the given allocno's life. If the allocno + is a move instruction operand and another operand is a + hard-register of the allocno's cover class, the cost of the + hard-register is decreased by the move cost. + + When an allocno is assigned, the hard-register with minimal + full cost is used. Initially, a hard-register's full cost is + the corresponding value from the hard-register's cost vector. + If the allocno is connected by a *copy* (see below) to + another allocno which has just received a hard-register, the + cost of the hard-register is decreased. Before choosing a + hard-register for an allocno, the allocno's current costs of + the hard-registers are modified by the conflict hard-register + costs of all of the conflicting allocnos which are not + assigned yet. + + - *Conflict hard-register costs*. This is a vector of the same + size as the hard-register costs vector. To permit an + unassigned allocno to get a better hard-register, IRA uses + this vector to calculate the final full cost of the + available hard-registers. Conflict hard-register costs of an + unassigned allocno are also changed with a change of the + hard-register cost of the allocno when a copy involving the + allocno is processed as described above. This is done to + show other unassigned allocnos that a given allocno prefers + some hard-registers in order to remove the move instruction + corresponding to the copy. + + o *Cap*. If a pseudo-register does not live in a region but + lives in a nested region, IRA creates a special allocno called + a cap in the outer region. A region cap is also created for a + subregion cap. + + o *Copy*. Allocnos can be connected by copies. Copies are used + to modify hard-register costs for allocnos during coloring. + Such modifications reflects a preference to use the same + hard-register for the allocnos connected by copies. Usually + copies are created for move insns (in this case it results in + register coalescing). But IRA also creates copies for operands + of an insn which should be assigned to the same hard-register + due to constraints in the machine description (it usually + results in removing a move generated in reload to satisfy + the constraints) and copies referring to the allocno which is + the output operand of an instruction and the allocno which is + an input operand dying in the instruction (creation of such + copies results in less register shuffling). IRA *does not* + create copies between the same register allocnos from different + regions because we use another technique for propagating + hard-register preference on the borders of regions. + + Allocnos (including caps) for the upper region in the region tree + *accumulate* information important for coloring from allocnos with + the same pseudo-register from nested regions. This includes + hard-register and memory costs, conflicts with hard-registers, + allocno conflicts, allocno copies and more. *Thus, attributes for + allocnos in a region have the same values as if the region had no + subregions*. It means that attributes for allocnos in the + outermost region corresponding to the function have the same values + as though the allocation used only one region which is the entire + function. It also means that we can look at IRA work as if the + first IRA did allocation for all function then it improved the + allocation for loops then their subloops and so on. + + IRA major passes are: + + o Building IRA internal representation which consists of the + following subpasses: + + * First, IRA builds regions and creates allocnos (file + ira-build.c) and initializes most of their attributes. + + * Then IRA finds a cover class for each allocno and calculates + its initial (non-accumulated) cost of memory and each + hard-register of its cover class (file ira-cost.c). + + * IRA creates live ranges of each allocno, calulates register + pressure for each cover class in each region, sets up + conflict hard registers for each allocno and info about calls + the allocno lives through (file ira-lives.c). + + * IRA removes low register pressure loops from the regions + mostly to speed IRA up (file ira-build.c). + + * IRA propagates accumulated allocno info from lower region + allocnos to corresponding upper region allocnos (file + ira-build.c). + + * IRA creates all caps (file ira-build.c). + + * Having live-ranges of allocnos and their cover classes, IRA + creates conflicting allocnos of the same cover class for each + allocno. Conflicting allocnos are stored as a bit vector or + array of pointers to the conflicting allocnos whatever is + more profitable (file ira-conflicts.c). At this point IRA + creates allocno copies. + + o Coloring. Now IRA has all necessary info to start graph coloring + process. It is done in each region on top-down traverse of the + region tree (file ira-color.c). There are following subpasses: + + * Optional aggressive coalescing of allocnos in the region. + + * Putting allocnos onto the coloring stack. IRA uses Briggs + optimistic coloring which is a major improvement over + Chaitin's coloring. Therefore IRA does not spill allocnos at + this point. There is some freedom in the order of putting + allocnos on the stack which can affect the final result of + the allocation. IRA uses some heuristics to improve the order. + + * Popping the allocnos from the stack and assigning them hard + registers. If IRA can not assign a hard register to an + allocno and the allocno is coalesced, IRA undoes the + coalescing and puts the uncoalesced allocnos onto the stack in + the hope that some such allocnos will get a hard register + separately. If IRA fails to assign hard register or memory + is more profitable for it, IRA spills the allocno. IRA + assigns the allocno the hard-register with minimal full + allocation cost which reflects the cost of usage of the + hard-register for the allocno and cost of usage of the + hard-register for allocnos conflicting with given allocno. + + * After allono assigning in the region, IRA modifies the hard + register and memory costs for the corresponding allocnos in + the subregions to reflect the cost of possible loads, stores, + or moves on the border of the region and its subregions. + When default regional allocation algorithm is used + (-fira-algorithm=mixed), IRA just propagates the assignment + for allocnos if the register pressure in the region for the + corresponding cover class is less than number of available + hard registers for given cover class. + + o Spill/restore code moving. When IRA performs an allocation + by traversing regions in top-down order, it does not know what + happens below in the region tree. Therefore, sometimes IRA + misses opportunities to perform a better allocation. A simple + optimization tries to improve allocation in a region having + subregions and containing in another region. If the + corresponding allocnos in the subregion are spilled, it spills + the region allocno if it is profitable. The optimization + implements a simple iterative algorithm performing profitable + transformations while they are still possible. It is fast in + practice, so there is no real need for a better time complexity + algorithm. + + o Code change. After coloring, two allocnos representing the same + pseudo-register outside and inside a region respectively may be + assigned to different locations (hard-registers or memory). In + this case IRA creates and uses a new pseudo-register inside the + region and adds code to move allocno values on the region's + borders. This is done during top-down traversal of the regions + (file ira-emit.c). In some complicated cases IRA can create a + new allocno to move allocno values (e.g. when a swap of values + stored in two hard-registers is needed). At this stage, the + new allocno is marked as spilled. IRA still creates the + pseudo-register and the moves on the region borders even when + both allocnos were assigned to the same hard-register. If the + reload pass spills a pseudo-register for some reason, the + effect will be smaller because another allocno will still be in + the hard-register. In most cases, this is better then spilling + both allocnos. If reload does not change the allocation + for the two pseudo-registers, the trivial move will be removed + by post-reload optimizations. IRA does not generate moves for + allocnos assigned to the same hard register when the default + regional allocation algorithm is used and the register pressure + in the region for the corresponding allocno cover class is less + than number of available hard registers for given cover class. + IRA also does some optimizations to remove redundant stores and + to reduce code duplication on the region borders. + + o Flattening internal representation. After changing code, IRA + transforms its internal representation for several regions into + one region representation (file ira-build.c). This process is + called IR flattening. Such process is more complicated than IR + rebuilding would be, but is much faster. + + o After IR flattening, IRA tries to assign hard registers to all + spilled allocnos. This is impelemented by a simple and fast + priority coloring algorithm (see function + ira_reassign_conflict_allocnos::ira-color.c). Here new allocnos + created during the code change pass can be assigned to hard + registers. + + o At the end IRA calls the reload pass. The reload pass + communicates with IRA through several functions in file + ira-color.c to improve its decisions in + + * sharing stack slots for the spilled pseudos based on IRA info + about pseudo-register conflicts. + + * reassigning hard-registers to all spilled pseudos at the end + of each reload iteration. + + * choosing a better hard-register to spill based on IRA info + about pseudo-register live ranges and the register pressure + in places where the pseudo-register lives. + + IRA uses a lot of data representing the target processors. These + data are initilized in file ira.c. + + If function has no loops (or the loops are ignored when + -fira-algorithm=CB is used), we have classic Chaitin-Briggs + coloring (only instead of separate pass of coalescing, we use hard + register preferencing). In such case, IRA works much faster + because many things are not made (like IR flattening, the + spill/restore optimization, and the code change). + + Literature is worth to read for better understanding the code: + + o Preston Briggs, Keith D. Cooper, Linda Torczon. Improvements to + Graph Coloring Register Allocation. + + o David Callahan, Brian Koblenz. Register allocation via + hierarchical graph coloring. + + o Keith Cooper, Anshuman Dasgupta, Jason Eckhardt. Revisiting Graph + Coloring Register Allocation: A Study of the Chaitin-Briggs and + Callahan-Koblenz Algorithms. + + o Guei-Yuan Lueh, Thomas Gross, and Ali-Reza Adl-Tabatabai. Global + Register Allocation Based on Graph Fusion. + + o Vladimir Makarov. The Integrated Register Allocator for GCC. + + o Vladimir Makarov. The top-down register allocator for irregular + register file architectures. + +*/ + + +#include "config.h" +#include "system.h" +#include "coretypes.h" +#include "tm.h" +#include "regs.h" +#include "rtl.h" +#include "tm_p.h" +#include "target.h" +#include "flags.h" +#include "obstack.h" +#include "bitmap.h" +#include "hard-reg-set.h" +#include "basic-block.h" +#include "expr.h" +#include "recog.h" +#include "params.h" +#include "timevar.h" +#include "tree-pass.h" +#include "output.h" +#include "reload.h" +#include "errors.h" +#include "integrate.h" +#include "df.h" +#include "ggc.h" +#include "ira-int.h" + + +/* A modified value of flag `-fira-verbose' used internally. */ +int internal_flag_ira_verbose; + +/* Dump file of the allocator if it is not NULL. */ +FILE *ira_dump_file; + +/* Pools for allocnos, copies, allocno live ranges. */ +alloc_pool allocno_pool, copy_pool, allocno_live_range_pool; + +/* The number of elements in the following array. */ +int ira_spilled_reg_stack_slots_num; + +/* The following array contains info about spilled pseudo-registers + stack slots used in current function so far. */ +struct ira_spilled_reg_stack_slot *ira_spilled_reg_stack_slots; + +/* Correspondingly overall cost of the allocation, cost of the + allocnos assigned to hard-registers, cost of the allocnos assigned + to memory, cost of loads, stores and register move insns generated + for pseudo-register live range splitting (see ira-emit.c). */ +int ira_overall_cost; +int ira_reg_cost, ira_mem_cost; +int ira_load_cost, ira_store_cost, ira_shuffle_cost; +int ira_move_loops_num, ira_additional_jumps_num; + +/* Map: hard regs X modes -> set of hard registers for storing value + of given mode starting with given hard register. */ +HARD_REG_SET ira_reg_mode_hard_regset[FIRST_PSEUDO_REGISTER][NUM_MACHINE_MODES]; + +/* The following two variables are array analogs of the macros + MEMORY_MOVE_COST and REGISTER_MOVE_COST. */ +short int ira_memory_move_cost[MAX_MACHINE_MODE][N_REG_CLASSES][2]; +move_table *ira_register_move_cost[MAX_MACHINE_MODE]; + +/* Similar to may_move_in_cost but it is calculated in IRA instead of + regclass. Another difference is that we take only available hard + registers into account to figure out that one register class is a + subset of the another one. */ +move_table *ira_may_move_in_cost[MAX_MACHINE_MODE]; + +/* Similar to may_move_out_cost but it is calculated in IRA instead of + regclass. Another difference is that we take only available hard + registers into account to figure out that one register class is a + subset of the another one. */ +move_table *ira_may_move_out_cost[MAX_MACHINE_MODE]; + +/* Register class subset relation: TRUE if the first class is a subset + of the second one considering only hard registers available for the + allocation. */ +int ira_class_subset_p[N_REG_CLASSES][N_REG_CLASSES]; + +/* Temporary hard reg set used for a different calculation. */ +static HARD_REG_SET temp_hard_regset; + + + +/* The function sets up the map IRA_REG_MODE_HARD_REGSET. */ +static void +setup_reg_mode_hard_regset (void) +{ + int i, m, hard_regno; + + for (m = 0; m < NUM_MACHINE_MODES; m++) + for (hard_regno = 0; hard_regno < FIRST_PSEUDO_REGISTER; hard_regno++) + { + CLEAR_HARD_REG_SET (ira_reg_mode_hard_regset[hard_regno][m]); + for (i = hard_regno_nregs[hard_regno][m] - 1; i >= 0; i--) + if (hard_regno + i < FIRST_PSEUDO_REGISTER) + SET_HARD_REG_BIT (ira_reg_mode_hard_regset[hard_regno][m], + hard_regno + i); + } +} + + + +/* Hard registers that can not be used for the register allocator for + all functions of the current compilation unit. */ +static HARD_REG_SET no_unit_alloc_regs; + +/* Array of the number of hard registers of given class which are + available for allocation. The order is defined by the + allocation order. */ +short ira_class_hard_regs[N_REG_CLASSES][FIRST_PSEUDO_REGISTER]; + +/* The number of elements of the above array for given register + class. */ +int ira_class_hard_regs_num[N_REG_CLASSES]; + +/* Index (in ira_class_hard_regs) for given register class and hard + register (in general case a hard register can belong to several + register classes). The index is negative for hard registers + unavailable for the allocation. */ +short ira_class_hard_reg_index[N_REG_CLASSES][FIRST_PSEUDO_REGISTER]; + +/* The function sets up the three arrays declared above. */ +static void +setup_class_hard_regs (void) +{ + int cl, i, hard_regno, n; + HARD_REG_SET processed_hard_reg_set; + + ira_assert (SHRT_MAX >= FIRST_PSEUDO_REGISTER); + /* We could call ORDER_REGS_FOR_LOCAL_ALLOC here (it is usually + putting hard callee-used hard registers first). But our + heuristics work better. */ + for (cl = (int) N_REG_CLASSES - 1; cl >= 0; cl--) + { + COPY_HARD_REG_SET (temp_hard_regset, reg_class_contents[cl]); + AND_COMPL_HARD_REG_SET (temp_hard_regset, no_unit_alloc_regs); + CLEAR_HARD_REG_SET (processed_hard_reg_set); + for (n = 0, i = 0; i < FIRST_PSEUDO_REGISTER; i++) + { +#ifdef REG_ALLOC_ORDER + hard_regno = reg_alloc_order[i]; +#else + hard_regno = i; +#endif + if (TEST_HARD_REG_BIT (processed_hard_reg_set, hard_regno)) + continue; + SET_HARD_REG_BIT (processed_hard_reg_set, hard_regno); + if (! TEST_HARD_REG_BIT (temp_hard_regset, hard_regno)) + ira_class_hard_reg_index[cl][hard_regno] = -1; + else + { + ira_class_hard_reg_index[cl][hard_regno] = n; + ira_class_hard_regs[cl][n++] = hard_regno; + } + } + ira_class_hard_regs_num[cl] = n; + } +} + +/* Number of given class hard registers available for the register + allocation for given classes. */ +int ira_available_class_regs[N_REG_CLASSES]; + +/* Set up IRA_AVAILABLE_CLASS_REGS. */ +static void +setup_available_class_regs (void) +{ + int i, j; + + memset (ira_available_class_regs, 0, sizeof (ira_available_class_regs)); + for (i = 0; i < N_REG_CLASSES; i++) + { + COPY_HARD_REG_SET (temp_hard_regset, reg_class_contents[i]); + AND_COMPL_HARD_REG_SET (temp_hard_regset, no_unit_alloc_regs); + for (j = 0; j < FIRST_PSEUDO_REGISTER; j++) + if (TEST_HARD_REG_BIT (temp_hard_regset, j)) + ira_available_class_regs[i]++; + } +} + +/* Set up global variables defining info about hard registers for the + allocation. These depend on USE_HARD_FRAME_P whose TRUE value means + that we can use the hard frame pointer for the allocation. */ +static void +setup_alloc_regs (bool use_hard_frame_p) +{ + COPY_HARD_REG_SET (no_unit_alloc_regs, fixed_reg_set); + if (! use_hard_frame_p) + SET_HARD_REG_BIT (no_unit_alloc_regs, HARD_FRAME_POINTER_REGNUM); + setup_class_hard_regs (); + setup_available_class_regs (); +} + + + +/* Set up IRA_MEMORY_MOVE_COST, IRA_REGISTER_MOVE_COST. */ +static void +setup_class_subset_and_memory_move_costs (void) +{ + int cl, cl2; + enum machine_mode mode; + HARD_REG_SET temp_hard_regset2; + + for (mode = 0; mode < MAX_MACHINE_MODE; mode++) + ira_memory_move_cost[mode][NO_REGS][0] + = ira_memory_move_cost[mode][NO_REGS][1] = SHRT_MAX; + for (cl = (int) N_REG_CLASSES - 1; cl >= 0; cl--) + { + if (cl != (int) NO_REGS) + for (mode = 0; mode < MAX_MACHINE_MODE; mode++) + { + ira_memory_move_cost[mode][cl][0] = MEMORY_MOVE_COST (mode, cl, 0); + ira_memory_move_cost[mode][cl][1] = MEMORY_MOVE_COST (mode, cl, 1); + /* Costs for NO_REGS are used in cost calculation on the + 1st pass when the preferred register classes are not + known yet. In this case we take the best scenario. */ + if (ira_memory_move_cost[mode][NO_REGS][0] + > ira_memory_move_cost[mode][cl][0]) + ira_memory_move_cost[mode][NO_REGS][0] + = ira_memory_move_cost[mode][cl][0]; + if (ira_memory_move_cost[mode][NO_REGS][1] + > ira_memory_move_cost[mode][cl][1]) + ira_memory_move_cost[mode][NO_REGS][1] + = ira_memory_move_cost[mode][cl][1]; + } + for (cl2 = (int) N_REG_CLASSES - 1; cl2 >= 0; cl2--) + { + COPY_HARD_REG_SET (temp_hard_regset, reg_class_contents[cl]); + AND_COMPL_HARD_REG_SET (temp_hard_regset, no_unit_alloc_regs); + COPY_HARD_REG_SET (temp_hard_regset2, reg_class_contents[cl2]); + AND_COMPL_HARD_REG_SET (temp_hard_regset2, no_unit_alloc_regs); + ira_class_subset_p[cl][cl2] + = hard_reg_set_subset_p (temp_hard_regset, temp_hard_regset2); + } + } +} + + + +/* Define the following macro if allocation through malloc if + preferable. */ +#define IRA_NO_OBSTACK + +#ifndef IRA_NO_OBSTACK +/* Obstack used for storing all dynamic data (except bitmaps) of the + IRA. */ +static struct obstack ira_obstack; +#endif + +/* Obstack used for storing all bitmaps of the IRA. */ +static struct bitmap_obstack ira_bitmap_obstack; + +/* Allocate memory of size LEN for IRA data. */ +void * +ira_allocate (size_t len) +{ + void *res; + +#ifndef IRA_NO_OBSTACK + res = obstack_alloc (&ira_obstack, len); +#else + res = xmalloc (len); +#endif + return res; +} + +/* Reallocate memory PTR of size LEN for IRA data. */ +void * +ira_reallocate (void *ptr, size_t len) +{ + void *res; + +#ifndef IRA_NO_OBSTACK + res = obstack_alloc (&ira_obstack, len); +#else + res = xrealloc (ptr, len); +#endif + return res; +} + +/* Free memory ADDR allocated for IRA data. */ +void +ira_free (void *addr ATTRIBUTE_UNUSED) +{ +#ifndef IRA_NO_OBSTACK + /* do nothing */ +#else + free (addr); +#endif +} + + +/* Allocate and returns bitmap for IRA. */ +bitmap +ira_allocate_bitmap (void) +{ + return BITMAP_ALLOC (&ira_bitmap_obstack); +} + +/* Free bitmap B allocated for IRA. */ +void +ira_free_bitmap (bitmap b ATTRIBUTE_UNUSED) +{ + /* do nothing */ +} + + + +/* Output information about allocation of all allocnos (except for + caps) into file F. */ +void +ira_print_disposition (FILE *f) +{ + int i, n, max_regno; + ira_allocno_t a; + basic_block bb; + + fprintf (f, "Disposition:"); + max_regno = max_reg_num (); + for (n = 0, i = FIRST_PSEUDO_REGISTER; i < max_regno; i++) + for (a = ira_regno_allocno_map[i]; + a != NULL; + a = ALLOCNO_NEXT_REGNO_ALLOCNO (a)) + { + if (n % 4 == 0) + fprintf (f, "\n"); + n++; + fprintf (f, " %4d:r%-4d", ALLOCNO_NUM (a), ALLOCNO_REGNO (a)); + if ((bb = ALLOCNO_LOOP_TREE_NODE (a)->bb) != NULL) + fprintf (f, "b%-3d", bb->index); + else + fprintf (f, "l%-3d", ALLOCNO_LOOP_TREE_NODE (a)->loop->num); + if (ALLOCNO_HARD_REGNO (a) >= 0) + fprintf (f, " %3d", ALLOCNO_HARD_REGNO (a)); + else + fprintf (f, " mem"); + } + fprintf (f, "\n"); +} + +/* Outputs information about allocation of all allocnos into + stderr. */ +void +ira_debug_disposition (void) +{ + ira_print_disposition (stderr); +} + + + +/* For each reg class, table listing all the classes contained in it + (excluding the class itself. Non-allocatable registers are + excluded from the consideration). */ +static enum reg_class alloc_reg_class_subclasses[N_REG_CLASSES][N_REG_CLASSES]; + +/* Initialize the table of subclasses of each reg class. */ +static void +setup_reg_subclasses (void) +{ + int i, j; + HARD_REG_SET temp_hard_regset2; + + for (i = 0; i < N_REG_CLASSES; i++) + for (j = 0; j < N_REG_CLASSES; j++) + alloc_reg_class_subclasses[i][j] = LIM_REG_CLASSES; + + for (i = 0; i < N_REG_CLASSES; i++) + { + if (i == (int) NO_REGS) + continue; + + COPY_HARD_REG_SET (temp_hard_regset, reg_class_contents[i]); + AND_COMPL_HARD_REG_SET (temp_hard_regset, no_unit_alloc_regs); + if (hard_reg_set_equal_p (temp_hard_regset, ira_zero_hard_reg_set)) + continue; + for (j = 0; j < N_REG_CLASSES; j++) + if (i != j) + { + enum reg_class *p; + + COPY_HARD_REG_SET (temp_hard_regset2, reg_class_contents[j]); + AND_COMPL_HARD_REG_SET (temp_hard_regset2, no_unit_alloc_regs); + if (! hard_reg_set_subset_p (temp_hard_regset, + temp_hard_regset2)) + continue; + p = &alloc_reg_class_subclasses[j][0]; + while (*p != LIM_REG_CLASSES) p++; + *p = (enum reg_class) i; + } + } +} + + + +/* Number of cover classes. Cover classes is non-intersected register + classes containing all hard-registers available for the + allocation. */ +int ira_reg_class_cover_size; + +/* The array containing cover classes (see also comments for macro + IRA_COVER_CLASSES). Only first IRA_REG_CLASS_COVER_SIZE elements are + used for this. */ +enum reg_class ira_reg_class_cover[N_REG_CLASSES]; + +/* The number of elements in the subsequent array. */ +int ira_important_classes_num; + +/* The array containing non-empty classes (including non-empty cover + classes) which are subclasses of cover classes. Such classes is + important for calculation of the hard register usage costs. */ +enum reg_class ira_important_classes[N_REG_CLASSES]; + +/* The array containing indexes of important classes in the previous + array. The array elements are defined only for important + classes. */ +int ira_important_class_nums[N_REG_CLASSES]; + +#ifdef IRA_COVER_CLASSES + +/* Check IRA_COVER_CLASSES and sets the four global variables defined + above. */ +static void +setup_cover_and_important_classes (void) +{ + int i, j; + enum reg_class cl; + static enum reg_class classes[] = IRA_COVER_CLASSES; + HARD_REG_SET temp_hard_regset2; + + ira_reg_class_cover_size = 0; + for (i = 0; (cl = classes[i]) != LIM_REG_CLASSES; i++) + { + for (j = 0; j < i; j++) + if (reg_classes_intersect_p (cl, classes[j])) + gcc_unreachable (); + COPY_HARD_REG_SET (temp_hard_regset, reg_class_contents[cl]); + AND_COMPL_HARD_REG_SET (temp_hard_regset, no_unit_alloc_regs); + if (! hard_reg_set_equal_p (temp_hard_regset, ira_zero_hard_reg_set)) + ira_reg_class_cover[ira_reg_class_cover_size++] = cl; + } + ira_important_classes_num = 0; + for (cl = 0; cl < N_REG_CLASSES; cl++) + { + COPY_HARD_REG_SET (temp_hard_regset, reg_class_contents[cl]); + AND_COMPL_HARD_REG_SET (temp_hard_regset, no_unit_alloc_regs); + if (! hard_reg_set_equal_p (temp_hard_regset, ira_zero_hard_reg_set)) + for (j = 0; j < ira_reg_class_cover_size; j++) + { + COPY_HARD_REG_SET (temp_hard_regset, reg_class_contents[cl]); + AND_COMPL_HARD_REG_SET (temp_hard_regset, no_unit_alloc_regs); + COPY_HARD_REG_SET (temp_hard_regset2, + reg_class_contents[ira_reg_class_cover[j]]); + AND_COMPL_HARD_REG_SET (temp_hard_regset2, no_unit_alloc_regs); + if (cl == ira_reg_class_cover[j] + || (hard_reg_set_subset_p (temp_hard_regset, temp_hard_regset2) + && ! hard_reg_set_equal_p (temp_hard_regset, + temp_hard_regset2))) + { + ira_important_class_nums[cl] = ira_important_classes_num; + ira_important_classes[ira_important_classes_num++] = cl; + } + } + } +} +#endif + +/* Map of all register classes to corresponding cover class containing + the given class. If given class is not a subset of a cover class, + we translate it into the cheapest cover class. */ +enum reg_class ira_class_translate[N_REG_CLASSES]; + +#ifdef IRA_COVER_CLASSES + +/* Set up array IRA_CLASS_TRANSLATE. */ +static void +setup_class_translate (void) +{ + enum reg_class cl, cover_class, best_class, *cl_ptr; + enum machine_mode mode; + int i, cost, min_cost, best_cost; + + for (cl = 0; cl < N_REG_CLASSES; cl++) + ira_class_translate[cl] = NO_REGS; + for (i = 0; i < ira_reg_class_cover_size; i++) + { + cover_class = ira_reg_class_cover[i]; + for (cl_ptr = &alloc_reg_class_subclasses[cover_class][0]; + (cl = *cl_ptr) != LIM_REG_CLASSES; + cl_ptr++) + { + if (ira_class_translate[cl] == NO_REGS) + ira_class_translate[cl] = cover_class; +#ifdef ENABLE_IRA_CHECKING + else + { + COPY_HARD_REG_SET (temp_hard_regset, reg_class_contents[cl]); + AND_COMPL_HARD_REG_SET (temp_hard_regset, no_unit_alloc_regs); + if (! hard_reg_set_subset_p (temp_hard_regset, + ira_zero_hard_reg_set)) + gcc_unreachable (); + } +#endif + } + ira_class_translate[cover_class] = cover_class; + } + /* For classes which are not fully covered by a cover class (in + other words covered by more one cover class), use the cheapest + cover class. */ + for (cl = 0; cl < N_REG_CLASSES; cl++) + { + if (cl == NO_REGS || ira_class_translate[cl] != NO_REGS) + continue; + best_class = NO_REGS; + best_cost = INT_MAX; + for (i = 0; i < ira_reg_class_cover_size; i++) + { + cover_class = ira_reg_class_cover[i]; + COPY_HARD_REG_SET (temp_hard_regset, + reg_class_contents[cover_class]); + AND_HARD_REG_SET (temp_hard_regset, reg_class_contents[cl]); + AND_COMPL_HARD_REG_SET (temp_hard_regset, no_unit_alloc_regs); + if (! hard_reg_set_equal_p (temp_hard_regset, ira_zero_hard_reg_set)) + { + min_cost = INT_MAX; + for (mode = 0; mode < MAX_MACHINE_MODE; mode++) + { + cost = (ira_memory_move_cost[mode][cl][0] + + ira_memory_move_cost[mode][cl][1]); + if (min_cost > cost) + min_cost = cost; + } + if (best_class == NO_REGS || best_cost > min_cost) + { + best_class = cover_class; + best_cost = min_cost; + } + } + } + ira_class_translate[cl] = best_class; + } +} +#endif + +/* The biggest important reg_class inside of intersection of the two + reg_classes (that is calculated taking only hard registers + available for allocation into account). If the both reg_classes + contain no hard registers available for allocation, the value is + calculated by taking all hard-registers including fixed ones into + account. */ +enum reg_class ira_reg_class_intersect[N_REG_CLASSES][N_REG_CLASSES]; + +/* The biggest important reg_class inside of union of the two + reg_classes (that is calculated taking only hard registers + available for allocation into account). If the both reg_classes + contain no hard registers available for allocation, the value is + calculated by taking all hard-registers including fixed ones into + account. In other words, the value is the corresponding + reg_class_subunion value. */ +enum reg_class ira_reg_class_union[N_REG_CLASSES][N_REG_CLASSES]; + +#ifdef IRA_COVER_CLASSES + +/* Set up IRA_REG_CLASS_INTERSECT and IRA_REG_CLASS_UNION. */ +static void +setup_reg_class_intersect_union (void) +{ + int i, cl1, cl2, cl3; + HARD_REG_SET intersection_set, union_set, temp_set2; + + for (cl1 = 0; cl1 < N_REG_CLASSES; cl1++) + { + for (cl2 = 0; cl2 < N_REG_CLASSES; cl2++) + { + ira_reg_class_intersect[cl1][cl2] = NO_REGS; + COPY_HARD_REG_SET (temp_hard_regset, reg_class_contents[cl1]); + AND_COMPL_HARD_REG_SET (temp_hard_regset, no_unit_alloc_regs); + COPY_HARD_REG_SET (temp_set2, reg_class_contents[cl2]); + AND_COMPL_HARD_REG_SET (temp_set2, no_unit_alloc_regs); + if (hard_reg_set_equal_p (temp_hard_regset, ira_zero_hard_reg_set) + && hard_reg_set_equal_p (temp_set2, ira_zero_hard_reg_set)) + { + for (i = 0;; i++) + { + cl3 = reg_class_subclasses[cl1][i]; + if (cl3 == LIM_REG_CLASSES) + break; + if (reg_class_subset_p (ira_reg_class_intersect[cl1][cl2], + cl3)) + ira_reg_class_intersect[cl1][cl2] = cl3; + } + ira_reg_class_union[cl1][cl2] = reg_class_subunion[cl1][cl2]; + continue; + } + ira_reg_class_union[cl1][cl2] = NO_REGS; + COPY_HARD_REG_SET (intersection_set, reg_class_contents[cl1]); + AND_HARD_REG_SET (intersection_set, reg_class_contents[cl2]); + AND_COMPL_HARD_REG_SET (intersection_set, no_unit_alloc_regs); + COPY_HARD_REG_SET (union_set, reg_class_contents[cl1]); + IOR_HARD_REG_SET (union_set, reg_class_contents[cl2]); + AND_COMPL_HARD_REG_SET (union_set, no_unit_alloc_regs); + for (i = 0; i < ira_important_classes_num; i++) + { + cl3 = ira_important_classes[i]; + COPY_HARD_REG_SET (temp_hard_regset, reg_class_contents[cl3]); + AND_COMPL_HARD_REG_SET (temp_hard_regset, no_unit_alloc_regs); + if (hard_reg_set_subset_p (temp_hard_regset, intersection_set)) + { + COPY_HARD_REG_SET + (temp_set2, + reg_class_contents[(int) + ira_reg_class_intersect[cl1][cl2]]); + AND_COMPL_HARD_REG_SET (temp_set2, no_unit_alloc_regs); + if (! hard_reg_set_subset_p (temp_hard_regset, temp_set2) + /* Ignore unavailable hard registers and prefer + smallest class for debugging purposes. */ + || (hard_reg_set_equal_p (temp_hard_regset, temp_set2) + && hard_reg_set_subset_p + (reg_class_contents[cl3], + reg_class_contents + [(int) ira_reg_class_intersect[cl1][cl2]]))) + ira_reg_class_intersect[cl1][cl2] = (enum reg_class) cl3; + } + if (hard_reg_set_subset_p (temp_hard_regset, union_set)) + { + COPY_HARD_REG_SET + (temp_set2, + reg_class_contents[(int) ira_reg_class_union[cl1][cl2]]); + AND_COMPL_HARD_REG_SET (temp_set2, no_unit_alloc_regs); + if (ira_reg_class_union[cl1][cl2] == NO_REGS + || (hard_reg_set_subset_p (temp_set2, temp_hard_regset) + + && (! hard_reg_set_equal_p (temp_set2, + temp_hard_regset) + /* Ignore unavailable hard registers and + prefer smallest class for debugging + purposes. */ + || hard_reg_set_subset_p + (reg_class_contents[cl3], + reg_class_contents + [(int) ira_reg_class_union[cl1][cl2]])))) + ira_reg_class_union[cl1][cl2] = (enum reg_class) cl3; + } + } + } + } +} + +#endif + +/* Output all cover classes and the translation map into file F. */ +static void +print_class_cover (FILE *f) +{ + static const char *const reg_class_names[] = REG_CLASS_NAMES; + int i; + + fprintf (f, "Class cover:\n"); + for (i = 0; i < ira_reg_class_cover_size; i++) + fprintf (f, " %s", reg_class_names[ira_reg_class_cover[i]]); + fprintf (f, "\nClass translation:\n"); + for (i = 0; i < N_REG_CLASSES; i++) + fprintf (f, " %s -> %s\n", reg_class_names[i], + reg_class_names[ira_class_translate[i]]); +} + +/* Output all cover classes and the translation map into + stderr. */ +void +ira_debug_class_cover (void) +{ + print_class_cover (stderr); +} + +/* Set up different arrays concerning class subsets, cover and + important classes. */ +static void +find_reg_class_closure (void) +{ + setup_reg_subclasses (); +#ifdef IRA_COVER_CLASSES + setup_cover_and_important_classes (); + setup_class_translate (); + setup_reg_class_intersect_union (); +#endif +} + + + +/* Map: register class x machine mode -> number of hard registers of + given class needed to store value of given mode. If the number is + different, the size will be negative. */ +int ira_reg_class_nregs[N_REG_CLASSES][MAX_MACHINE_MODE]; + +/* Maximal value of the previous array elements. */ +int ira_max_nregs; + +/* Form IRA_REG_CLASS_NREGS map. */ +static void +setup_reg_class_nregs (void) +{ + int m; + enum reg_class cl; + + ira_max_nregs = -1; + for (cl = 0; cl < N_REG_CLASSES; cl++) + for (m = 0; m < MAX_MACHINE_MODE; m++) + { + ira_reg_class_nregs[cl][m] = CLASS_MAX_NREGS (cl, m); + if (ira_max_nregs < ira_reg_class_nregs[cl][m]) + ira_max_nregs = ira_reg_class_nregs[cl][m]; + } +} + + + +/* Array whose values are hard regset of hard registers available for + the allocation of given register class whose HARD_REGNO_MODE_OK + values for given mode are zero. */ +HARD_REG_SET prohibited_class_mode_regs[N_REG_CLASSES][NUM_MACHINE_MODES]; + +/* Set up PROHIBITED_CLASS_MODE_REGS. */ +static void +setup_prohibited_class_mode_regs (void) +{ + int i, j, k, hard_regno; + enum reg_class cl; + + for (i = 0; i < ira_reg_class_cover_size; i++) + { + cl = ira_reg_class_cover[i]; + for (j = 0; j < NUM_MACHINE_MODES; j++) + { + CLEAR_HARD_REG_SET (prohibited_class_mode_regs[cl][j]); + for (k = ira_class_hard_regs_num[cl] - 1; k >= 0; k--) + { + hard_regno = ira_class_hard_regs[cl][k]; + if (! HARD_REGNO_MODE_OK (hard_regno, j)) + SET_HARD_REG_BIT (prohibited_class_mode_regs[cl][j], + hard_regno); + } + } + } +} + + + +/* Allocate and initialize IRA_REGISTER_MOVE_COST, + IRA_MAY_MOVE_IN_COST, and IRA_MAY_MOVE_OUT_COST for MODE if it is + not done yet. */ +void +ira_init_register_move_cost (enum machine_mode mode) +{ + int cl1, cl2; + + ira_assert (ira_register_move_cost[mode] == NULL + && ira_may_move_in_cost[mode] == NULL + && ira_may_move_out_cost[mode] == NULL); + if (move_cost[mode] == NULL) + init_move_cost (mode); + ira_register_move_cost[mode] = move_cost[mode]; + /* Don't use ira_allocate because the tables exist out of scope of a + IRA call. */ + ira_may_move_in_cost[mode] + = (move_table *) xmalloc (sizeof (move_table) * N_REG_CLASSES); + memcpy (ira_may_move_in_cost[mode], may_move_in_cost[mode], + sizeof (move_table) * N_REG_CLASSES); + ira_may_move_out_cost[mode] + = (move_table *) xmalloc (sizeof (move_table) * N_REG_CLASSES); + memcpy (ira_may_move_out_cost[mode], may_move_out_cost[mode], + sizeof (move_table) * N_REG_CLASSES); + for (cl1 = 0; cl1 < N_REG_CLASSES; cl1++) + { + for (cl2 = 0; cl2 < N_REG_CLASSES; cl2++) + { + if (ira_class_subset_p[cl1][cl2]) + ira_may_move_in_cost[mode][cl1][cl2] = 0; + if (ira_class_subset_p[cl2][cl1]) + ira_may_move_out_cost[mode][cl1][cl2] = 0; + } + } +} + + + +/* Hard regsets whose all bits are correspondingly zero or one. */ +HARD_REG_SET ira_zero_hard_reg_set; +HARD_REG_SET ira_one_hard_reg_set; + +/* This is called once during compiler work. It sets up + different arrays whose values don't depend on the compiled + function. */ +void +ira_init_once (void) +{ + enum machine_mode mode; + + CLEAR_HARD_REG_SET (ira_zero_hard_reg_set); + SET_HARD_REG_SET (ira_one_hard_reg_set); + for (mode = 0; mode < MAX_MACHINE_MODE; mode++) + { + ira_register_move_cost[mode] = NULL; + ira_may_move_in_cost[mode] = NULL; + ira_may_move_out_cost[mode] = NULL; + } + ira_init_costs_once (); +} + +/* Free ira_register_move_cost, ira_may_move_in_cost, and + ira_may_move_out_cost for each mode. */ +static void +free_register_move_costs (void) +{ + enum machine_mode mode; + + for (mode = 0; mode < MAX_MACHINE_MODE; mode++) + { + if (ira_may_move_in_cost[mode] != NULL) + free (ira_may_move_in_cost[mode]); + if (ira_may_move_out_cost[mode] != NULL) + free (ira_may_move_out_cost[mode]); + ira_register_move_cost[mode] = NULL; + ira_may_move_in_cost[mode] = NULL; + ira_may_move_out_cost[mode] = NULL; + } +} + +/* This is called every time when register related information is + changed. */ +void +ira_init (void) +{ + free_register_move_costs (); + setup_reg_mode_hard_regset (); + setup_alloc_regs (flag_omit_frame_pointer != 0); + setup_class_subset_and_memory_move_costs (); + find_reg_class_closure (); + setup_reg_class_nregs (); + setup_prohibited_class_mode_regs (); + ira_init_costs (); +} + +/* Function called once at the end of compiler work. */ +void +ira_finish_once (void) +{ + ira_finish_costs_once (); + free_register_move_costs (); +} + + + +/* Array whose values are hard regset of hard registers for which + move of the hard register in given mode into itself is + prohibited. */ +HARD_REG_SET ira_prohibited_mode_move_regs[NUM_MACHINE_MODES]; + +/* Flag of that the above array has been initialized. */ +static bool ira_prohibited_mode_move_regs_initialized_p = false; + +/* Set up IRA_PROHIBITED_MODE_MOVE_REGS. */ +static void +setup_prohibited_mode_move_regs (void) +{ + int i, j; + rtx test_reg1, test_reg2, move_pat, move_insn; + + if (ira_prohibited_mode_move_regs_initialized_p) + return; + ira_prohibited_mode_move_regs_initialized_p = true; + test_reg1 = gen_rtx_REG (VOIDmode, 0); + test_reg2 = gen_rtx_REG (VOIDmode, 0); + move_pat = gen_rtx_SET (VOIDmode, test_reg1, test_reg2); + move_insn = gen_rtx_INSN (VOIDmode, 0, 0, 0, 0, 0, move_pat, -1, 0); + for (i = 0; i < NUM_MACHINE_MODES; i++) + { + SET_HARD_REG_SET (ira_prohibited_mode_move_regs[i]); + for (j = 0; j < FIRST_PSEUDO_REGISTER; j++) + { + if (! HARD_REGNO_MODE_OK (j, i)) + continue; + SET_REGNO (test_reg1, j); + PUT_MODE (test_reg1, i); + SET_REGNO (test_reg2, j); + PUT_MODE (test_reg2, i); + INSN_CODE (move_insn) = -1; + recog_memoized (move_insn); + if (INSN_CODE (move_insn) < 0) + continue; + extract_insn (move_insn); + if (! constrain_operands (1)) + continue; + CLEAR_HARD_REG_BIT (ira_prohibited_mode_move_regs[i], j); + } + } +} + + + +/* Function specific hard registers that can not be used for the + register allocation. */ +HARD_REG_SET ira_no_alloc_regs; + +/* Return TRUE if *LOC contains an asm. */ +static int +insn_contains_asm_1 (rtx *loc, void *data ATTRIBUTE_UNUSED) +{ + if ( !*loc) + return FALSE; + if (GET_CODE (*loc) == ASM_OPERANDS) + return TRUE; + return FALSE; +} + + +/* Return TRUE if INSN contains an ASM. */ +static bool +insn_contains_asm (rtx insn) +{ + return for_each_rtx (&insn, insn_contains_asm_1, NULL); +} + +/* Set up regs_asm_clobbered. */ +static void +compute_regs_asm_clobbered (char *regs_asm_clobbered) +{ + basic_block bb; + + memset (regs_asm_clobbered, 0, sizeof (char) * FIRST_PSEUDO_REGISTER); + + FOR_EACH_BB (bb) + { + rtx insn; + FOR_BB_INSNS_REVERSE (bb, insn) + { + struct df_ref **def_rec; + + if (insn_contains_asm (insn)) + for (def_rec = DF_INSN_DEFS (insn); *def_rec; def_rec++) + { + struct df_ref *def = *def_rec; + unsigned int dregno = DF_REF_REGNO (def); + if (dregno < FIRST_PSEUDO_REGISTER) + { + unsigned int i; + enum machine_mode mode = GET_MODE (DF_REF_REAL_REG (def)); + unsigned int end = dregno + + hard_regno_nregs[dregno][mode] - 1; + + for (i = dregno; i <= end; ++i) + regs_asm_clobbered[i] = 1; + } + } + } + } +} + + +/* Set up ELIMINABLE_REGSET, IRA_NO_ALLOC_REGS, and REGS_EVER_LIVE. */ +static void +setup_eliminable_regset (void) +{ + int i; + /* Like regs_ever_live, but 1 if a reg is set or clobbered from an + asm. Unlike regs_ever_live, elements of this array corresponding + to eliminable regs (like the frame pointer) are set if an asm + sets them. */ + char *regs_asm_clobbered + = (char *) alloca (FIRST_PSEUDO_REGISTER * sizeof (char)); +#ifdef ELIMINABLE_REGS + static const struct {const int from, to; } eliminables[] = ELIMINABLE_REGS; +#endif + /* FIXME: If EXIT_IGNORE_STACK is set, we will not save and restore + sp for alloca. So we can't eliminate the frame pointer in that + case. At some point, we should improve this by emitting the + sp-adjusting insns for this case. */ + int need_fp + = (! flag_omit_frame_pointer + || (cfun->calls_alloca && EXIT_IGNORE_STACK) + || crtl->accesses_prior_frames + || crtl->stack_realign_needed + || FRAME_POINTER_REQUIRED); + + frame_pointer_needed = need_fp; + + COPY_HARD_REG_SET (ira_no_alloc_regs, no_unit_alloc_regs); + CLEAR_HARD_REG_SET (eliminable_regset); + + compute_regs_asm_clobbered (regs_asm_clobbered); + /* Build the regset of all eliminable registers and show we can't + use those that we already know won't be eliminated. */ +#ifdef ELIMINABLE_REGS + for (i = 0; i < (int) ARRAY_SIZE (eliminables); i++) + { + bool cannot_elim + = (! CAN_ELIMINATE (eliminables[i].from, eliminables[i].to) + || (eliminables[i].to == STACK_POINTER_REGNUM && need_fp)); + + if (! regs_asm_clobbered[eliminables[i].from]) + { + SET_HARD_REG_BIT (eliminable_regset, eliminables[i].from); + + if (cannot_elim) + SET_HARD_REG_BIT (ira_no_alloc_regs, eliminables[i].from); + } + else if (cannot_elim) + error ("%s cannot be used in asm here", + reg_names[eliminables[i].from]); + else + df_set_regs_ever_live (eliminables[i].from, true); + } +#if FRAME_POINTER_REGNUM != HARD_FRAME_POINTER_REGNUM + if (! regs_asm_clobbered[HARD_FRAME_POINTER_REGNUM]) + { + SET_HARD_REG_BIT (eliminable_regset, HARD_FRAME_POINTER_REGNUM); + if (need_fp) + SET_HARD_REG_BIT (ira_no_alloc_regs, HARD_FRAME_POINTER_REGNUM); + } + else if (need_fp) + error ("%s cannot be used in asm here", + reg_names[HARD_FRAME_POINTER_REGNUM]); + else + df_set_regs_ever_live (HARD_FRAME_POINTER_REGNUM, true); +#endif + +#else + if (! regs_asm_clobbered[FRAME_POINTER_REGNUM]) + { + SET_HARD_REG_BIT (eliminable_regset, FRAME_POINTER_REGNUM); + if (need_fp) + SET_HARD_REG_BIT (ira_no_alloc_regs, FRAME_POINTER_REGNUM); + } + else if (need_fp) + error ("%s cannot be used in asm here", reg_names[FRAME_POINTER_REGNUM]); + else + df_set_regs_ever_live (FRAME_POINTER_REGNUM, true); +#endif +} + + + +/* The length of the following two arrays. */ +int ira_reg_equiv_len; + +/* The element value is TRUE if the corresponding regno value is + invariant. */ +bool *ira_reg_equiv_invariant_p; + +/* The element value is equiv constant of given pseudo-register or + NULL_RTX. */ +rtx *ira_reg_equiv_const; + +/* Set up the two arrays declared above. */ +static void +find_reg_equiv_invariant_const (void) +{ + int i; + bool invariant_p; + rtx list, insn, note, constant, x; + + for (i = FIRST_PSEUDO_REGISTER; i < reg_equiv_init_size; i++) + { + constant = NULL_RTX; + invariant_p = false; + for (list = reg_equiv_init[i]; list != NULL_RTX; list = XEXP (list, 1)) + { + insn = XEXP (list, 0); + note = find_reg_note (insn, REG_EQUIV, NULL_RTX); + + if (note == NULL_RTX) + continue; + + x = XEXP (note, 0); + + if (! function_invariant_p (x) + || ! flag_pic + /* A function invariant is often CONSTANT_P but may + include a register. We promise to only pass CONSTANT_P + objects to LEGITIMATE_PIC_OPERAND_P. */ + || (CONSTANT_P (x) && LEGITIMATE_PIC_OPERAND_P (x))) + { + /* It can happen that a REG_EQUIV note contains a MEM + that is not a legitimate memory operand. As later + stages of the reload assume that all addresses found + in the reg_equiv_* arrays were originally legitimate, + we ignore such REG_EQUIV notes. */ + if (memory_operand (x, VOIDmode)) + invariant_p = MEM_READONLY_P (x); + else if (function_invariant_p (x)) + { + if (GET_CODE (x) == PLUS + || x == frame_pointer_rtx || x == arg_pointer_rtx) + invariant_p = true; + else + constant = x; + } + } + } + ira_reg_equiv_invariant_p[i] = invariant_p; + ira_reg_equiv_const[i] = constant; + } +} + + + +/* Set up REG_RENUMBER and CALLER_SAVE_NEEDED (used by reload) from + the allocation found by IRA. */ +static void +setup_reg_renumber (void) +{ + int regno, hard_regno; + ira_allocno_t a; + ira_allocno_iterator ai; + + caller_save_needed = 0; + FOR_EACH_ALLOCNO (a, ai) + { + /* There are no caps at this point. */ + ira_assert (ALLOCNO_CAP_MEMBER (a) == NULL); + if (! ALLOCNO_ASSIGNED_P (a)) + /* It can happen if A is not referenced but partially anticipated + somewhere in a region. */ + ALLOCNO_ASSIGNED_P (a) = true; + ira_free_allocno_updated_costs (a); + hard_regno = ALLOCNO_HARD_REGNO (a); + regno = (int) REGNO (ALLOCNO_REG (a)); + reg_renumber[regno] = (hard_regno < 0 ? -1 : hard_regno); + if (hard_regno >= 0 && ALLOCNO_CALLS_CROSSED_NUM (a) != 0 + && ! ira_hard_reg_not_in_set_p (hard_regno, ALLOCNO_MODE (a), + call_used_reg_set)) + { + ira_assert (!optimize || flag_caller_saves + || regno >= ira_reg_equiv_len + || ira_reg_equiv_const[regno] + || ira_reg_equiv_invariant_p[regno]); + caller_save_needed = 1; + } + } +} + +/* Set up allocno assignment flags for further allocation + improvements. */ +static void +setup_allocno_assignment_flags (void) +{ + int hard_regno; + ira_allocno_t a; + ira_allocno_iterator ai; + + FOR_EACH_ALLOCNO (a, ai) + { + if (! ALLOCNO_ASSIGNED_P (a)) + /* It can happen if A is not referenced but partially anticipated + somewhere in a region. */ + ira_free_allocno_updated_costs (a); + hard_regno = ALLOCNO_HARD_REGNO (a); + /* Don't assign hard registers to allocnos which are destination + of removed store at the end of loop. It has no sense to keep + the same value in different hard registers. It is also + impossible to assign hard registers correctly to such + allocnos because the cost info and info about intersected + calls are incorrect for them. */ + ALLOCNO_ASSIGNED_P (a) = (hard_regno >= 0 + || ALLOCNO_MEM_OPTIMIZED_DEST_P (a) + || (ALLOCNO_MEMORY_COST (a) + - ALLOCNO_COVER_CLASS_COST (a)) < 0); + ira_assert (hard_regno < 0 + || ! ira_hard_reg_not_in_set_p (hard_regno, ALLOCNO_MODE (a), + reg_class_contents + [ALLOCNO_COVER_CLASS (a)])); + } +} + +/* Evaluate overall allocation cost and the costs for using hard + registers and memory for allocnos. */ +static void +calculate_allocation_cost (void) +{ + int hard_regno, cost; + ira_allocno_t a; + ira_allocno_iterator ai; + + ira_overall_cost = ira_reg_cost = ira_mem_cost = 0; + FOR_EACH_ALLOCNO (a, ai) + { + hard_regno = ALLOCNO_HARD_REGNO (a); + ira_assert (hard_regno < 0 + || ! ira_hard_reg_not_in_set_p + (hard_regno, ALLOCNO_MODE (a), + reg_class_contents[ALLOCNO_COVER_CLASS (a)])); + if (hard_regno < 0) + { + cost = ALLOCNO_MEMORY_COST (a); + ira_mem_cost += cost; + } + else if (ALLOCNO_HARD_REG_COSTS (a) != NULL) + { + cost = (ALLOCNO_HARD_REG_COSTS (a) + [ira_class_hard_reg_index + [ALLOCNO_COVER_CLASS (a)][hard_regno]]); + ira_reg_cost += cost; + } + else + { + cost = ALLOCNO_COVER_CLASS_COST (a); + ira_reg_cost += cost; + } + ira_overall_cost += cost; + } + + if (internal_flag_ira_verbose > 0 && ira_dump_file != NULL) + { + fprintf (ira_dump_file, + "+++Costs: overall %d, reg %d, mem %d, ld %d, st %d, move %d\n", + ira_overall_cost, ira_reg_cost, ira_mem_cost, + ira_load_cost, ira_store_cost, ira_shuffle_cost); + fprintf (ira_dump_file, "+++ move loops %d, new jumps %d\n", + ira_move_loops_num, ira_additional_jumps_num); + } + +} + +#ifdef ENABLE_IRA_CHECKING +/* Check the correctness of the allocation. We do need this because + of complicated code to transform more one region internal + representation into one region representation. */ +static void +check_allocation (void) +{ + ira_allocno_t a, conflict_a; + int hard_regno, conflict_hard_regno, nregs, conflict_nregs; + ira_allocno_conflict_iterator aci; + ira_allocno_iterator ai; + + FOR_EACH_ALLOCNO (a, ai) + { + if (ALLOCNO_CAP_MEMBER (a) != NULL + || (hard_regno = ALLOCNO_HARD_REGNO (a)) < 0) + continue; + nregs = hard_regno_nregs[hard_regno][ALLOCNO_MODE (a)]; + FOR_EACH_ALLOCNO_CONFLICT (a, conflict_a, aci) + if ((conflict_hard_regno = ALLOCNO_HARD_REGNO (conflict_a)) >= 0) + { + conflict_nregs + = (hard_regno_nregs + [conflict_hard_regno][ALLOCNO_MODE (conflict_a)]); + if ((conflict_hard_regno <= hard_regno + && hard_regno < conflict_hard_regno + conflict_nregs) + || (hard_regno <= conflict_hard_regno + && conflict_hard_regno < hard_regno + nregs)) + { + fprintf (stderr, "bad allocation for %d and %d\n", + ALLOCNO_REGNO (a), ALLOCNO_REGNO (conflict_a)); + gcc_unreachable (); + } + } + } +} +#endif + +/* Fix values of array REG_EQUIV_INIT after live range splitting done + by IRA. */ +static void +fix_reg_equiv_init (void) +{ + int max_regno = max_reg_num (); + int i, new_regno; + rtx x, prev, next, insn, set; + + if (reg_equiv_init_size < max_regno) + { + reg_equiv_init + = (rtx *) ggc_realloc (reg_equiv_init, max_regno * sizeof (rtx)); + while (reg_equiv_init_size < max_regno) + reg_equiv_init[reg_equiv_init_size++] = NULL_RTX; + for (i = FIRST_PSEUDO_REGISTER; i < reg_equiv_init_size; i++) + for (prev = NULL_RTX, x = reg_equiv_init[i]; x != NULL_RTX; x = next) + { + next = XEXP (x, 1); + insn = XEXP (x, 0); + set = single_set (insn); + ira_assert (set != NULL_RTX + && (REG_P (SET_DEST (set)) || REG_P (SET_SRC (set)))); + if (REG_P (SET_DEST (set)) + && ((int) REGNO (SET_DEST (set)) == i + || (int) ORIGINAL_REGNO (SET_DEST (set)) == i)) + new_regno = REGNO (SET_DEST (set)); + else if (REG_P (SET_SRC (set)) + && ((int) REGNO (SET_SRC (set)) == i + || (int) ORIGINAL_REGNO (SET_SRC (set)) == i)) + new_regno = REGNO (SET_SRC (set)); + else + gcc_unreachable (); + if (new_regno == i) + prev = x; + else + { + if (prev == NULL_RTX) + reg_equiv_init[i] = next; + else + XEXP (prev, 1) = next; + XEXP (x, 1) = reg_equiv_init[new_regno]; + reg_equiv_init[new_regno] = x; + } + } + } +} + +#ifdef ENABLE_IRA_CHECKING +/* Print redundant memory-memory copies. */ +static void +print_redundant_copies (void) +{ + int hard_regno; + ira_allocno_t a; + ira_copy_t cp, next_cp; + ira_allocno_iterator ai; + + FOR_EACH_ALLOCNO (a, ai) + { + if (ALLOCNO_CAP_MEMBER (a) != NULL) + /* It is a cap. */ + continue; + hard_regno = ALLOCNO_HARD_REGNO (a); + if (hard_regno >= 0) + continue; + for (cp = ALLOCNO_COPIES (a); cp != NULL; cp = next_cp) + if (cp->first == a) + next_cp = cp->next_first_allocno_copy; + else + { + next_cp = cp->next_second_allocno_copy; + if (internal_flag_ira_verbose > 4 && ira_dump_file != NULL + && cp->insn != NULL_RTX + && ALLOCNO_HARD_REGNO (cp->first) == hard_regno) + fprintf (ira_dump_file, + " Redundant move from %d(freq %d):%d\n", + INSN_UID (cp->insn), cp->freq, hard_regno); + } + } +} +#endif + +/* Setup preferred and alternative classes for new pseudo-registers + created by IRA starting with START. */ +static void +setup_preferred_alternate_classes_for_new_pseudos (int start) +{ + int i, old_regno; + int max_regno = max_reg_num (); + + for (i = start; i < max_regno; i++) + { + old_regno = ORIGINAL_REGNO (regno_reg_rtx[i]); + ira_assert (i != old_regno); + setup_reg_classes (i, reg_preferred_class (old_regno), + reg_alternate_class (old_regno)); + if (internal_flag_ira_verbose > 2 && ira_dump_file != NULL) + fprintf (ira_dump_file, + " New r%d: setting preferred %s, alternative %s\n", + i, reg_class_names[reg_preferred_class (old_regno)], + reg_class_names[reg_alternate_class (old_regno)]); + } +} + + + +/* Regional allocation can create new pseudo-registers. This function + expands some arrays for pseudo-registers. */ +static void +expand_reg_info (int old_size) +{ + int i; + int size = max_reg_num (); + + resize_reg_info (); + for (i = old_size; i < size; i++) + { + reg_renumber[i] = -1; + setup_reg_classes (i, GENERAL_REGS, ALL_REGS); + } +} + + + +/* This page contains code for sorting the insn chain used by reload. + In the old register allocator, the insn chain order corresponds to + the order of insns in RTL. By putting insns with higher execution + frequency BBs first, reload has a better chance to generate less + expensive operand reloads for such insns. */ + +/* Map bb index -> order number in the BB chain in RTL code. */ +static int *basic_block_order_nums; + +/* Map chain insn uid -> order number in the insn chain before sorting + the insn chain. */ +static int *chain_insn_order; + +/* The function is used to sort insn chain according insn execution + frequencies. */ +static int +chain_freq_compare (const void *v1p, const void *v2p) +{ + const struct insn_chain *c1 = *(struct insn_chain * const *)v1p; + const struct insn_chain *c2 = *(struct insn_chain * const *)v2p; + int diff; + + diff = (BASIC_BLOCK (c2->block)->frequency + - BASIC_BLOCK (c1->block)->frequency); + if (diff) + return diff; + /* Keep the same order in BB scope. */ + return (chain_insn_order[INSN_UID(c1->insn)] + - chain_insn_order[INSN_UID(c2->insn)]); +} + +/* Sort the insn chain according insn original order. */ +static int +chain_bb_compare (const void *v1p, const void *v2p) +{ + const struct insn_chain *c1 = *(struct insn_chain * const *)v1p; + const struct insn_chain *c2 = *(struct insn_chain * const *)v2p; + int diff; + + diff = (basic_block_order_nums[c1->block] + - basic_block_order_nums[c2->block]); + if (diff) + return diff; + /* Keep the same order in BB scope. */ + return (chain_insn_order[INSN_UID(c1->insn)] + - chain_insn_order[INSN_UID(c2->insn)]); +} + +/* Sort the insn chain according to insn frequencies if + FREQ_P or according to insn original order otherwise. */ +void +ira_sort_insn_chain (bool freq_p) +{ + struct insn_chain *chain, **chain_arr; + basic_block bb; + int i, n; + + chain_insn_order = (int *) ira_allocate (get_max_uid () * sizeof (int)); + for (n = 0, chain = reload_insn_chain; chain != 0; chain = chain->next) + { + chain_insn_order[INSN_UID (chain->insn)] = n; + n++; + } + if (n <= 1) + return; + chain_arr + = (struct insn_chain **) ira_allocate (n * sizeof (struct insn_chain *)); + basic_block_order_nums + = (int *) ira_allocate (sizeof (int) * last_basic_block); + n = 0; + FOR_EACH_BB (bb) + { + basic_block_order_nums[bb->index] = n++; + } + for (n = 0, chain = reload_insn_chain; chain != 0; chain = chain->next) + chain_arr[n++] = chain; + qsort (chain_arr, n, sizeof (struct insn_chain *), + freq_p ? chain_freq_compare : chain_bb_compare); + ira_free (chain_insn_order); + for (i = 1; i < n - 1; i++) + { + chain_arr[i]->next = chain_arr[i + 1]; + chain_arr[i]->prev = chain_arr[i - 1]; + } + chain_arr[i]->next = NULL; + chain_arr[i]->prev = chain_arr[i - 1]; + reload_insn_chain = chain_arr[0]; + reload_insn_chain->prev = NULL; + reload_insn_chain->next = chain_arr[1]; + ira_free (basic_block_order_nums); + ira_free (chain_arr); +} + + + +/* All natural loops. */ +struct loops ira_loops; + +/* This is the main entry of IRA. */ +static void +ira (FILE *f) +{ + int overall_cost_before, allocated_reg_info_size; + bool loops_p; + int max_regno_before_ira, ira_max_point_before_emit; + int rebuild_p; + int saved_flag_ira_algorithm; + basic_block bb; + + timevar_push (TV_IRA); + + if (flag_ira_verbose < 10) + { + internal_flag_ira_verbose = flag_ira_verbose; + ira_dump_file = f; + } + else + { + internal_flag_ira_verbose = flag_ira_verbose - 10; + ira_dump_file = stderr; + } + + setup_prohibited_mode_move_regs (); + + df_note_add_problem (); + + if (optimize == 1) + { + df_live_add_problem (); + df_live_set_all_dirty (); + } +#ifdef ENABLE_CHECKING + df->changeable_flags |= DF_VERIFY_SCHEDULED; +#endif + df_analyze (); + df_clear_flags (DF_NO_INSN_RESCAN); + regstat_init_n_sets_and_refs (); + regstat_compute_ri (); + + /* If we are not optimizing, then this is the only place before + register allocation where dataflow is done. And that is needed + to generate these warnings. */ + if (warn_clobbered) + generate_setjmp_warnings (); + + rebuild_p = update_equiv_regs (); + +#ifndef IRA_NO_OBSTACK + gcc_obstack_init (&ira_obstack); +#endif + bitmap_obstack_initialize (&ira_bitmap_obstack); + if (optimize) + { + max_regno = max_reg_num (); + ira_reg_equiv_len = max_regno; + ira_reg_equiv_invariant_p + = (bool *) ira_allocate (max_regno * sizeof (bool)); + memset (ira_reg_equiv_invariant_p, 0, max_regno * sizeof (bool)); + ira_reg_equiv_const = (rtx *) ira_allocate (max_regno * sizeof (rtx)); + memset (ira_reg_equiv_const, 0, max_regno * sizeof (rtx)); + find_reg_equiv_invariant_const (); + if (rebuild_p) + { + timevar_push (TV_JUMP); + rebuild_jump_labels (get_insns ()); + purge_all_dead_edges (); + timevar_pop (TV_JUMP); + } + } + + max_regno_before_ira = allocated_reg_info_size = max_reg_num (); + allocate_reg_info (); + setup_eliminable_regset (); + + ira_overall_cost = ira_reg_cost = ira_mem_cost = 0; + ira_load_cost = ira_store_cost = ira_shuffle_cost = 0; + ira_move_loops_num = ira_additional_jumps_num = 0; + + ira_assert (current_loops == NULL); + flow_loops_find (&ira_loops); + current_loops = &ira_loops; + saved_flag_ira_algorithm = flag_ira_algorithm; + if (optimize && number_of_loops () > (unsigned) IRA_MAX_LOOPS_NUM) + flag_ira_algorithm = IRA_ALGORITHM_CB; + + if (internal_flag_ira_verbose > 0 && ira_dump_file != NULL) + fprintf (ira_dump_file, "Building IRA IR\n"); + loops_p = ira_build (optimize + && (flag_ira_algorithm == IRA_ALGORITHM_REGIONAL + || flag_ira_algorithm == IRA_ALGORITHM_MIXED)); + if (optimize) + ira_color (); + else + ira_fast_allocation (); + + ira_max_point_before_emit = ira_max_point; + + ira_emit (loops_p); + + if (optimize) + { + max_regno = max_reg_num (); + + if (! loops_p) + ira_initiate_assign (); + else + { + expand_reg_info (allocated_reg_info_size); + setup_preferred_alternate_classes_for_new_pseudos + (allocated_reg_info_size); + allocated_reg_info_size = max_regno; + + if (internal_flag_ira_verbose > 0 && ira_dump_file != NULL) + fprintf (ira_dump_file, "Flattening IR\n"); + ira_flattening (max_regno_before_ira, ira_max_point_before_emit); + /* New insns were generated: add notes and recalculate live + info. */ + df_analyze (); + + flow_loops_find (&ira_loops); + current_loops = &ira_loops; + + setup_allocno_assignment_flags (); + ira_initiate_assign (); + ira_reassign_conflict_allocnos (max_regno); + } + } + + setup_reg_renumber (); + + calculate_allocation_cost (); + +#ifdef ENABLE_IRA_CHECKING + if (optimize) + check_allocation (); +#endif + + delete_trivially_dead_insns (get_insns (), max_reg_num ()); + max_regno = max_reg_num (); + + /* Determine if the current function is a leaf before running IRA + since this can impact optimizations done by the prologue and + epilogue thus changing register elimination offsets. */ + current_function_is_leaf = leaf_function_p (); + + /* And the reg_equiv_memory_loc array. */ + VEC_safe_grow (rtx, gc, reg_equiv_memory_loc_vec, max_regno); + memset (VEC_address (rtx, reg_equiv_memory_loc_vec), 0, + sizeof (rtx) * max_regno); + reg_equiv_memory_loc = VEC_address (rtx, reg_equiv_memory_loc_vec); + + if (max_regno != max_regno_before_ira) + { + regstat_free_n_sets_and_refs (); + regstat_free_ri (); + regstat_init_n_sets_and_refs (); + regstat_compute_ri (); + } + + allocate_initial_values (reg_equiv_memory_loc); + + overall_cost_before = ira_overall_cost; + if (optimize) + { + fix_reg_equiv_init (); + +#ifdef ENABLE_IRA_CHECKING + print_redundant_copies (); +#endif + + ira_spilled_reg_stack_slots_num = 0; + ira_spilled_reg_stack_slots + = ((struct ira_spilled_reg_stack_slot *) + ira_allocate (max_regno + * sizeof (struct ira_spilled_reg_stack_slot))); + memset (ira_spilled_reg_stack_slots, 0, + max_regno * sizeof (struct ira_spilled_reg_stack_slot)); + } + + timevar_pop (TV_IRA); + + timevar_push (TV_RELOAD); + df_set_flags (DF_NO_INSN_RESCAN); + build_insn_chain (); + + if (optimize) + ira_sort_insn_chain (true); + + reload_completed = !reload (get_insns (), optimize > 0); + + timevar_pop (TV_RELOAD); + + timevar_push (TV_IRA); + + if (optimize) + { + ira_free (ira_spilled_reg_stack_slots); + + ira_finish_assign (); + + } + if (internal_flag_ira_verbose > 0 && ira_dump_file != NULL + && overall_cost_before != ira_overall_cost) + fprintf (ira_dump_file, "+++Overall after reload %d\n", ira_overall_cost); + ira_destroy (); + + flow_loops_free (&ira_loops); + free_dominance_info (CDI_DOMINATORS); + FOR_ALL_BB (bb) + bb->loop_father = NULL; + current_loops = NULL; + + flag_ira_algorithm = saved_flag_ira_algorithm; + + regstat_free_ri (); + regstat_free_n_sets_and_refs (); + + if (optimize) + { + cleanup_cfg (CLEANUP_EXPENSIVE); + + ira_free (ira_reg_equiv_invariant_p); + ira_free (ira_reg_equiv_const); + } + + bitmap_obstack_release (&ira_bitmap_obstack); +#ifndef IRA_NO_OBSTACK + obstack_free (&ira_obstack, NULL); +#endif + + /* The code after the reload has changed so much that at this point + we might as well just rescan everything. Not that + df_rescan_all_insns is not going to help here because it does not + touch the artificial uses and defs. */ + df_finish_pass (true); + if (optimize > 1) + df_live_add_problem (); + df_scan_alloc (NULL); + df_scan_blocks (); + + if (optimize) + df_analyze (); + + timevar_pop (TV_IRA); +} + + + +static bool +gate_ira (void) +{ + return flag_ira != 0; +} + +/* Run the integrated register allocator. */ +static unsigned int +rest_of_handle_ira (void) +{ + ira (dump_file); + return 0; +} + +struct rtl_opt_pass pass_ira = +{ + { + RTL_PASS, + "ira", /* name */ + gate_ira, /* gate */ + rest_of_handle_ira, /* execute */ + NULL, /* sub */ + NULL, /* next */ + 0, /* static_pass_number */ + 0, /* tv_id */ + 0, /* properties_required */ + 0, /* properties_provided */ + 0, /* properties_destroyed */ + 0, /* todo_flags_start */ + TODO_dump_func | + TODO_ggc_collect /* todo_flags_finish */ + } +}; diff --git a/gcc/ira.h b/gcc/ira.h new file mode 100644 index 00000000000..8a90785d5c6 --- /dev/null +++ b/gcc/ira.h @@ -0,0 +1,37 @@ +/* Communication between the Integrated Register Allocator (IRA) and + the rest of the compiler. + Copyright (C) 2006, 2007, 2008 + Free Software Foundation, Inc. + Contributed by Vladimir Makarov <vmakarov@redhat.com>. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 3, or (at your option) any later +version. + +GCC is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +<http://www.gnu.org/licenses/>. */ + +extern void ira_init_once (void); +extern void ira_init (void); +extern void ira_finish_once (void); +extern rtx ira_eliminate_regs (rtx, enum machine_mode); +extern void ira_sort_insn_chain (bool); + +extern void ira_sort_regnos_for_alter_reg (int *, int, unsigned int *); +extern void ira_mark_allocation_change (int); +extern void ira_mark_memory_move_deletion (int, int); +extern bool ira_reassign_pseudos (int *, int, HARD_REG_SET, HARD_REG_SET *, + HARD_REG_SET *, bitmap); +extern rtx ira_reuse_stack_slot (int, unsigned int, unsigned int); +extern void ira_mark_new_stack_slot (rtx, int, unsigned int); +extern bool ira_better_spill_reload_regno_p (int *, int *, rtx, rtx, rtx); + diff --git a/gcc/local-alloc.c b/gcc/local-alloc.c index 98b67700808..e7bbcdaa169 100644 --- a/gcc/local-alloc.c +++ b/gcc/local-alloc.c @@ -298,7 +298,6 @@ static int equiv_init_movable_p (rtx, int); static int contains_replace_regs (rtx); static int memref_referenced_p (rtx, rtx); static int memref_used_between_p (rtx, rtx, rtx); -static void update_equiv_regs (void); static void no_equiv (rtx, const_rtx, void *); static void block_alloc (int); static int qty_sugg_compare (int, int); @@ -795,9 +794,11 @@ memref_used_between_p (rtx memref, rtx start, rtx end) into the using insn. If it succeeds, we can eliminate the register completely. - Initialize the REG_EQUIV_INIT array of initializing insns. */ + Initialize the REG_EQUIV_INIT array of initializing insns. -static void + Return non-zero if jump label rebuilding should be done. */ + +int update_equiv_regs (void) { rtx insn; @@ -1183,6 +1184,8 @@ update_equiv_regs (void) new_insn = emit_insn_before (PATTERN (equiv_insn), insn); REG_NOTES (new_insn) = REG_NOTES (equiv_insn); REG_NOTES (equiv_insn) = 0; + /* Rescan it to process the notes. */ + df_insn_rescan (new_insn); /* Make sure this insn is recognized before reload begins, otherwise @@ -1227,6 +1230,7 @@ update_equiv_regs (void) end_alias_analysis (); free (reg_equiv); + return recorded_label_ref; } /* Mark REG as having no known equivalence. @@ -2442,6 +2446,12 @@ find_stack_regs (void) } #endif +static bool +gate_handle_local_alloc (void) +{ + return ! flag_ira; +} + /* Run old register allocator. Return TRUE if we must exit rest_of_compilation upon return. */ static unsigned int @@ -2517,7 +2527,7 @@ struct rtl_opt_pass pass_local_alloc = { RTL_PASS, "lreg", /* name */ - NULL, /* gate */ + gate_handle_local_alloc, /* gate */ rest_of_handle_local_alloc, /* execute */ NULL, /* sub */ NULL, /* next */ diff --git a/gcc/opts.c b/gcc/opts.c index 1de217eb3af..badc0a1a6cd 100644 --- a/gcc/opts.c +++ b/gcc/opts.c @@ -878,6 +878,11 @@ decode_options (unsigned int argc, const char **argv) flag_section_anchors = 0; } +#ifdef IRA_COVER_CLASSES + /* Use IRA if it is implemented for the target. */ + flag_ira = 1; +#endif + /* Originally we just set the variables if a particular optimization level, but with the advent of being able to change the optimization level for a function, we need to reset optimizations. */ @@ -1119,6 +1124,14 @@ decode_options (unsigned int argc, const char **argv) flag_reorder_blocks = 1; } +#ifndef IRA_COVER_CLASSES + if (flag_ira) + { + inform ("-fira does not work on this architecture"); + flag_ira = 0; + } +#endif + /* Save the current optimization options if this is the first call. */ if (first_time_p) { @@ -1970,6 +1983,21 @@ common_handle_option (size_t scode, const char *arg, int value, warning (0, "unknown tls-model \"%s\"", arg); break; + case OPT_fira_algorithm_: + if (!strcmp (arg, "regional")) + flag_ira_algorithm = IRA_ALGORITHM_REGIONAL; + else if (!strcmp (arg, "CB")) + flag_ira_algorithm = IRA_ALGORITHM_CB; + else if (!strcmp (arg, "mixed")) + flag_ira_algorithm = IRA_ALGORITHM_MIXED; + else + warning (0, "unknown ira algorithm \"%s\"", arg); + break; + + case OPT_fira_verbose_: + flag_ira_verbose = value; + break; + case OPT_ftracer: flag_tracer_set = true; break; diff --git a/gcc/params.def b/gcc/params.def index 8dd1cf48eb5..c71b2b6500e 100644 --- a/gcc/params.def +++ b/gcc/params.def @@ -714,6 +714,11 @@ DEFPARAM (PARAM_DF_DOUBLE_QUEUE_THRESHOLD_FACTOR, "Multiplier used for determining the double-queueing threshold", 2, 0, 0) +DEFPARAM (PARAM_IRA_MAX_LOOPS_NUM, + "ira-max-loops-num", + "max loops number for regional RA", + 50, 0, 0) + /* Switch initialization conversion will refuse to create arrays that are bigger than this parameter times the number of switch branches. */ diff --git a/gcc/params.h b/gcc/params.h index 7e08ec741e5..8147b609848 100644 --- a/gcc/params.h +++ b/gcc/params.h @@ -167,6 +167,8 @@ typedef enum compiler_param PARAM_VALUE (PARAM_L2_CACHE_SIZE) #define USE_CANONICAL_TYPES \ PARAM_VALUE (PARAM_USE_CANONICAL_TYPES) +#define IRA_MAX_LOOPS_NUM \ + PARAM_VALUE (PARAM_IRA_MAX_LOOPS_NUM) #define SWITCH_CONVERSION_BRANCH_RATIO \ PARAM_VALUE (PARAM_SWITCH_CONVERSION_BRANCH_RATIO) #endif /* ! GCC_PARAMS_H */ diff --git a/gcc/passes.c b/gcc/passes.c index f47c6ec5b64..f45507f27e0 100644 --- a/gcc/passes.c +++ b/gcc/passes.c @@ -767,6 +767,7 @@ init_optimization_passes (void) NEXT_PASS (pass_subregs_of_mode_init); NEXT_PASS (pass_local_alloc); NEXT_PASS (pass_global_alloc); + NEXT_PASS (pass_ira); NEXT_PASS (pass_subregs_of_mode_finish); NEXT_PASS (pass_postreload); { diff --git a/gcc/postreload.c b/gcc/postreload.c index 5f6fec1e953..7659bab435a 100644 --- a/gcc/postreload.c +++ b/gcc/postreload.c @@ -1565,7 +1565,7 @@ move2add_note_store (rtx dst, const_rtx set, void *data ATTRIBUTE_UNUSED) static bool gate_handle_postreload (void) { - return (optimize > 0); + return (optimize > 0 && reload_completed); } diff --git a/gcc/regclass.c b/gcc/regclass.c index 386214dbe0d..b12d4168506 100644 --- a/gcc/regclass.c +++ b/gcc/regclass.c @@ -178,7 +178,7 @@ static enum reg_class reg_class_superclasses[N_REG_CLASSES][N_REG_CLASSES]; /* For each reg class, table listing all the classes contained in it. */ -static enum reg_class reg_class_subclasses[N_REG_CLASSES][N_REG_CLASSES]; +enum reg_class reg_class_subclasses[N_REG_CLASSES][N_REG_CLASSES]; /* For each pair of reg classes, a largest reg class contained in their union. */ @@ -211,24 +211,22 @@ bool have_regs_of_mode [MAX_MACHINE_MODE]; /* 1 if class does contain register of given mode. */ -static char contains_reg_of_mode [N_REG_CLASSES] [MAX_MACHINE_MODE]; - -typedef unsigned short move_table[N_REG_CLASSES]; +char contains_reg_of_mode [N_REG_CLASSES] [MAX_MACHINE_MODE]; /* Maximum cost of moving from a register in one class to a register in another class. Based on REGISTER_MOVE_COST. */ -static move_table *move_cost[MAX_MACHINE_MODE]; +move_table *move_cost[MAX_MACHINE_MODE]; /* Similar, but here we don't have to move if the first index is a subset of the second so in that case the cost is zero. */ -static move_table *may_move_in_cost[MAX_MACHINE_MODE]; +move_table *may_move_in_cost[MAX_MACHINE_MODE]; /* Similar, but here we don't have to move if the first index is a superset of the second so in that case the cost is zero. */ -static move_table *may_move_out_cost[MAX_MACHINE_MODE]; +move_table *may_move_out_cost[MAX_MACHINE_MODE]; /* Keep track of the last mode we initialized move costs for. */ static int last_mode_for_init_move_cost; @@ -313,7 +311,7 @@ init_reg_sets (void) /* Initialize may_move_cost and friends for mode M. */ -static void +void init_move_cost (enum machine_mode m) { static unsigned short last_move_cost[N_REG_CLASSES][N_REG_CLASSES]; @@ -1024,6 +1022,7 @@ reg_preferred_class (int regno) { if (reg_pref == 0) return GENERAL_REGS; + return (enum reg_class) reg_pref[regno].prefclass; } @@ -2283,6 +2282,32 @@ auto_inc_dec_reg_p (rtx reg, enum machine_mode mode) } #endif + +/* Allocate space for reg info. */ +void +allocate_reg_info (void) +{ + int size = max_reg_num (); + + gcc_assert (! reg_pref && ! reg_renumber); + reg_renumber = XNEWVEC (short, size); + reg_pref = XCNEWVEC (struct reg_pref, size); + memset (reg_renumber, -1, size * sizeof (short)); +} + + +/* Resize reg info. The new elements will be uninitialized. */ +void +resize_reg_info (void) +{ + int size = max_reg_num (); + + gcc_assert (reg_pref && reg_renumber); + reg_renumber = XRESIZEVEC (short, reg_renumber, size); + reg_pref = XRESIZEVEC (struct reg_pref, reg_pref, size); +} + + /* Free up the space allocated by allocate_reg_info. */ void free_reg_info (void) @@ -2300,6 +2325,21 @@ free_reg_info (void) } } + + + +/* Set up preferred and alternate classes for REGNO as PREFCLASS and + ALTCLASS. */ +void +setup_reg_classes (int regno, + enum reg_class prefclass, enum reg_class altclass) +{ + if (reg_pref == NULL) + return; + reg_pref[regno].prefclass = prefclass; + reg_pref[regno].altclass = altclass; +} + /* This is the `regscan' pass of the compiler, run just before cse and again just before loop. It finds the first and last use of each diff --git a/gcc/regmove.c b/gcc/regmove.c index 7de8d0d5b98..e25dbec7fe9 100644 --- a/gcc/regmove.c +++ b/gcc/regmove.c @@ -1117,7 +1117,8 @@ regmove_optimize (rtx f, int nregs) for (pass = 0; pass <= 2; pass++) { - if (! flag_regmove && pass >= flag_expensive_optimizations) + /* We need fewer optimizations for IRA. */ + if ((! flag_regmove || flag_ira) && pass >= flag_expensive_optimizations) goto done; if (dump_file) @@ -1165,7 +1166,9 @@ regmove_optimize (rtx f, int nregs) } } } - if (! flag_regmove) + + /* All optimizations important for IRA have been done. */ + if (! flag_regmove || flag_ira) continue; if (! find_matches (insn, &match)) diff --git a/gcc/regs.h b/gcc/regs.h index 20587ac27b2..00be997695f 100644 --- a/gcc/regs.h +++ b/gcc/regs.h @@ -262,9 +262,27 @@ extern int caller_save_needed; #define HARD_REGNO_CALL_PART_CLOBBERED(REGNO, MODE) 0 #endif +/* 1 if the corresponding class does contain register of given + mode. */ +extern char contains_reg_of_mode [N_REG_CLASSES] [MAX_MACHINE_MODE]; + +typedef unsigned short move_table[N_REG_CLASSES]; + +/* Maximum cost of moving from a register in one class to a register + in another class. */ +extern move_table *move_cost[MAX_MACHINE_MODE]; + /* Specify number of hard registers given machine mode occupy. */ extern unsigned char hard_regno_nregs[FIRST_PSEUDO_REGISTER][MAX_MACHINE_MODE]; +/* Similar, but here we don't have to move if the first index is a + subset of the second so in that case the cost is zero. */ +extern move_table *may_move_in_cost[MAX_MACHINE_MODE]; + +/* Similar, but here we don't have to move if the first index is a + superset of the second so in that case the cost is zero. */ +extern move_table *may_move_out_cost[MAX_MACHINE_MODE]; + /* Return an exclusive upper bound on the registers occupied by hard register (reg:MODE REGNO). */ diff --git a/gcc/reload.c b/gcc/reload.c index 5a79c44e874..81637007995 100644 --- a/gcc/reload.c +++ b/gcc/reload.c @@ -1549,8 +1549,10 @@ push_reload (rtx in, rtx out, rtx *inloc, rtx *outloc, && reg_mentioned_p (XEXP (note, 0), in) /* Check that a former pseudo is valid; see find_dummy_reload. */ && (ORIGINAL_REGNO (XEXP (note, 0)) < FIRST_PSEUDO_REGISTER - || (!bitmap_bit_p (DF_LIVE_OUT (ENTRY_BLOCK_PTR), - ORIGINAL_REGNO (XEXP (note, 0))) + || (! bitmap_bit_p (flag_ira + ? DF_LR_OUT (ENTRY_BLOCK_PTR) + : DF_LIVE_OUT (ENTRY_BLOCK_PTR), + ORIGINAL_REGNO (XEXP (note, 0))) && hard_regno_nregs[regno][GET_MODE (XEXP (note, 0))] == 1)) && ! refers_to_regno_for_reload_p (regno, end_hard_regno (rel_mode, @@ -2027,7 +2029,9 @@ find_dummy_reload (rtx real_in, rtx real_out, rtx *inloc, rtx *outloc, can ignore the conflict). We must never introduce writes to such hardregs, as they would clobber the other live pseudo. See PR 20973. */ - || (!bitmap_bit_p (DF_LIVE_OUT (ENTRY_BLOCK_PTR), + || (!bitmap_bit_p (flag_ira + ? DF_LR_OUT (ENTRY_BLOCK_PTR) + : DF_LIVE_OUT (ENTRY_BLOCK_PTR), ORIGINAL_REGNO (in)) /* Similarly, only do this if we can be sure that the death note is still valid. global can assign some hardreg to diff --git a/gcc/reload.h b/gcc/reload.h index 3873f5c9746..17d8a3e04b2 100644 --- a/gcc/reload.h +++ b/gcc/reload.h @@ -1,6 +1,6 @@ -/* Communication between reload.c and reload1.c. - Copyright (C) 1987, 1991, 1992, 1993, 1994, 1995, 1997, 1998, - 1999, 2000, 2001, 2003, 2004, 2007 Free Software Foundation, Inc. +/* Communication between reload.c, reload1.c and the rest of compiler. + Copyright (C) 1987, 1991, 1992, 1993, 1994, 1995, 1997, 1998, 1999, + 2000, 2001, 2003, 2004, 2007, 2008 Free Software Foundation, Inc. This file is part of GCC. @@ -209,8 +209,9 @@ struct insn_chain int block; /* The rtx of the insn. */ rtx insn; - /* Register life information: record all live hard registers, and all - live pseudos that have a hard register. */ + /* Register life information: record all live hard registers, and + all live pseudos that have a hard register. This set also + contains pseudos spilled by IRA. */ regset_head live_throughout; regset_head dead_or_set; diff --git a/gcc/reload1.c b/gcc/reload1.c index 3ee0fc37cfa..bb0d423e91b 100644 --- a/gcc/reload1.c +++ b/gcc/reload1.c @@ -44,6 +44,7 @@ along with GCC; see the file COPYING3. If not see #include "toplev.h" #include "except.h" #include "tree.h" +#include "ira.h" #include "df.h" #include "target.h" #include "dse.h" @@ -257,6 +258,9 @@ static unsigned int spill_stack_slot_width[FIRST_PSEUDO_REGISTER]; /* Record which pseudos needed to be spilled. */ static regset_head spilled_pseudos; +/* Record which pseudos changed their allocation in finish_spills. */ +static regset_head changed_allocation_pseudos; + /* Used for communication between order_regs_for_reload and count_pseudo. Used to avoid counting one pseudo twice. */ static regset_head pseudos_counted; @@ -389,7 +393,7 @@ static void delete_caller_save_insns (void); static void spill_failure (rtx, enum reg_class); static void count_spilled_pseudo (int, int, int); static void delete_dead_insn (rtx); -static void alter_reg (int, int); +static void alter_reg (int, int, bool); static void set_label_offsets (rtx, rtx, int); static void check_eliminable_occurrences (rtx); static void elimination_effects (rtx, enum machine_mode); @@ -443,6 +447,8 @@ static rtx inc_for_reload (rtx, rtx, rtx, int); static void add_auto_inc_notes (rtx, rtx); #endif static void copy_eh_notes (rtx, rtx); +static void substitute (rtx *, const_rtx, rtx); +static bool gen_reload_chain_without_interm_reg_p (int, int); static int reloads_conflict (int, int); static rtx gen_reload (rtx, rtx, int, enum reload_type); static rtx emit_insn_if_valid_for_reload (rtx); @@ -501,6 +507,7 @@ init_reload (void) reload_startobj = XOBNEWVAR (&reload_obstack, char, 0); INIT_REG_SET (&spilled_pseudos); + INIT_REG_SET (&changed_allocation_pseudos); INIT_REG_SET (&pseudos_counted); } @@ -546,11 +553,11 @@ compute_use_by_pseudos (HARD_REG_SET *to, regset from) if (r < 0) { - /* reload_combine uses the information from - DF_LIVE_IN (BASIC_BLOCK), which might still - contain registers that have not actually been allocated - since they have an equivalence. */ - gcc_assert (reload_completed); + /* reload_combine uses the information from DF_LIVE_IN, + which might still contain registers that have not + actually been allocated since they have an + equivalence. */ + gcc_assert ((flag_ira && optimize) || reload_completed); } else add_to_hard_reg_set (to, PSEUDO_REGNO_MODE (regno), r); @@ -684,6 +691,9 @@ static int something_needs_operands_changed; /* Nonzero means we couldn't get enough spill regs. */ static int failure; +/* Temporary array of pseudo-register number. */ +static int *temp_pseudo_reg_arr; + /* Main entry point for the reload pass. FIRST is the first insn of the function being compiled. @@ -700,7 +710,7 @@ static int failure; int reload (rtx first, int global) { - int i; + int i, n; rtx insn; struct elim_table *ep; basic_block bb; @@ -883,12 +893,21 @@ reload (rtx first, int global) offsets_known_at = XNEWVEC (char, num_labels); offsets_at = (HOST_WIDE_INT (*)[NUM_ELIMINABLE_REGS]) xmalloc (num_labels * NUM_ELIMINABLE_REGS * sizeof (HOST_WIDE_INT)); - /* Alter each pseudo-reg rtx to contain its hard reg number. - Assign stack slots to the pseudos that lack hard regs or equivalents. + /* Alter each pseudo-reg rtx to contain its hard reg number. Assign + stack slots to the pseudos that lack hard regs or equivalents. Do not touch virtual registers. */ - for (i = LAST_VIRTUAL_REGISTER + 1; i < max_regno; i++) - alter_reg (i, -1); + temp_pseudo_reg_arr = XNEWVEC (int, max_regno - LAST_VIRTUAL_REGISTER - 1); + for (n = 0, i = LAST_VIRTUAL_REGISTER + 1; i < max_regno; i++) + temp_pseudo_reg_arr[n++] = i; + + if (flag_ira && optimize) + /* Ask IRA to order pseudo-registers for better stack slot + sharing. */ + ira_sort_regnos_for_alter_reg (temp_pseudo_reg_arr, n, reg_max_ref_width); + + for (i = 0; i < n; i++) + alter_reg (temp_pseudo_reg_arr[i], -1, false); /* If we have some registers we think can be eliminated, scan all insns to see if there is an insn that sets one of these registers to something @@ -1002,7 +1021,7 @@ reload (rtx first, int global) the loop. */ reg_equiv_memory_loc[i] = 0; reg_equiv_init[i] = 0; - alter_reg (i, -1); + alter_reg (i, -1, true); } } @@ -1036,7 +1055,12 @@ reload (rtx first, int global) calculate_needs_all_insns (global); - CLEAR_REG_SET (&spilled_pseudos); + if (! flag_ira || ! optimize) + /* Don't do it for IRA. We need this info because we don't + change live_throughout and dead_or_set for chains when IRA + is used. */ + CLEAR_REG_SET (&spilled_pseudos); + did_spill = 0; something_changed = 0; @@ -1094,6 +1118,11 @@ reload (rtx first, int global) obstack_free (&reload_obstack, reload_firstobj); } + if (flag_ira && optimize) + /* Restore the original insn chain order for correct reload work + (e.g. for correct inheritance). */ + ira_sort_insn_chain (false); + /* If global-alloc was run, notify it of any register eliminations we have done. */ if (global) @@ -1163,6 +1192,7 @@ reload (rtx first, int global) regs. */ failed: + CLEAR_REG_SET (&changed_allocation_pseudos); CLEAR_REG_SET (&spilled_pseudos); reload_in_progress = 0; @@ -1333,6 +1363,8 @@ reload (rtx first, int global) VEC_free (rtx, gc, reg_equiv_memory_loc_vec); reg_equiv_memory_loc = 0; + free (temp_pseudo_reg_arr); + if (offsets_known_at) free (offsets_known_at); if (offsets_at) @@ -1573,10 +1605,24 @@ calculate_needs_all_insns (int global) { rtx set = single_set (insn); if (set - && SET_SRC (set) == SET_DEST (set) - && REG_P (SET_SRC (set)) - && REGNO (SET_SRC (set)) >= FIRST_PSEUDO_REGISTER) + && + ((SET_SRC (set) == SET_DEST (set) + && REG_P (SET_SRC (set)) + && REGNO (SET_SRC (set)) >= FIRST_PSEUDO_REGISTER) + || (REG_P (SET_SRC (set)) && REG_P (SET_DEST (set)) + && reg_renumber[REGNO (SET_SRC (set))] < 0 + && reg_renumber[REGNO (SET_DEST (set))] < 0 + && reg_equiv_memory_loc[REGNO (SET_SRC (set))] != NULL + && reg_equiv_memory_loc[REGNO (SET_DEST (set))] != NULL + && rtx_equal_p (reg_equiv_memory_loc + [REGNO (SET_SRC (set))], + reg_equiv_memory_loc + [REGNO (SET_DEST (set))])))) { + if (flag_ira && optimize) + /* Inform IRA about the insn deletion. */ + ira_mark_memory_move_deletion (REGNO (SET_DEST (set)), + REGNO (SET_SRC (set))); delete_insn (insn); /* Delete it from the reload chain. */ if (chain->prev) @@ -1665,6 +1711,10 @@ static int spill_cost[FIRST_PSEUDO_REGISTER]; only the first hard reg for a multi-reg pseudo. */ static int spill_add_cost[FIRST_PSEUDO_REGISTER]; +/* Map of hard regno to pseudo regno currently occupying the hard + reg. */ +static int hard_regno_to_pseudo_regno[FIRST_PSEUDO_REGISTER]; + /* Update the spill cost arrays, considering that pseudo REG is live. */ static void @@ -1675,7 +1725,10 @@ count_pseudo (int reg) int nregs; if (REGNO_REG_SET_P (&pseudos_counted, reg) - || REGNO_REG_SET_P (&spilled_pseudos, reg)) + || REGNO_REG_SET_P (&spilled_pseudos, reg) + /* Ignore spilled pseudo-registers which can be here only if IRA + is used. */ + || (flag_ira && optimize && r < 0)) return; SET_REGNO_REG_SET (&pseudos_counted, reg); @@ -1683,10 +1736,12 @@ count_pseudo (int reg) gcc_assert (r >= 0); spill_add_cost[r] += freq; - nregs = hard_regno_nregs[r][PSEUDO_REGNO_MODE (reg)]; while (nregs-- > 0) - spill_cost[r + nregs] += freq; + { + hard_regno_to_pseudo_regno[r + nregs] = reg; + spill_cost[r + nregs] += freq; + } } /* Calculate the SPILL_COST and SPILL_ADD_COST arrays and determine the @@ -1704,6 +1759,8 @@ order_regs_for_reload (struct insn_chain *chain) memset (spill_cost, 0, sizeof spill_cost); memset (spill_add_cost, 0, sizeof spill_add_cost); + for (i = 0; i < FIRST_PSEUDO_REGISTER; i++) + hard_regno_to_pseudo_regno[i] = -1; /* Count number of uses of each hard reg by pseudo regs allocated to it and then order them by decreasing use. First exclude hard registers @@ -1746,18 +1803,25 @@ static HARD_REG_SET used_spill_regs_local; static void count_spilled_pseudo (int spilled, int spilled_nregs, int reg) { + int freq = REG_FREQ (reg); int r = reg_renumber[reg]; int nregs = hard_regno_nregs[r][PSEUDO_REGNO_MODE (reg)]; - if (REGNO_REG_SET_P (&spilled_pseudos, reg) + /* Ignore spilled pseudo-registers which can be here only if IRA is + used. */ + if ((flag_ira && optimize && r < 0) + || REGNO_REG_SET_P (&spilled_pseudos, reg) || spilled + spilled_nregs <= r || r + nregs <= spilled) return; SET_REGNO_REG_SET (&spilled_pseudos, reg); - spill_add_cost[r] -= REG_FREQ (reg); + spill_add_cost[r] -= freq; while (nregs-- > 0) - spill_cost[r + nregs] -= REG_FREQ (reg); + { + hard_regno_to_pseudo_regno[r + nregs] = -1; + spill_cost[r + nregs] -= freq; + } } /* Find reload register to use for reload number ORDER. */ @@ -1769,11 +1833,13 @@ find_reg (struct insn_chain *chain, int order) struct reload *rl = rld + rnum; int best_cost = INT_MAX; int best_reg = -1; - unsigned int i, j; + unsigned int i, j, n; int k; HARD_REG_SET not_usable; HARD_REG_SET used_by_other_reload; reg_set_iterator rsi; + static int regno_pseudo_regs[FIRST_PSEUDO_REGISTER]; + static int best_regno_pseudo_regs[FIRST_PSEUDO_REGISTER]; COPY_HARD_REG_SET (not_usable, bad_spill_regs); IOR_HARD_REG_SET (not_usable, bad_spill_regs_global); @@ -1791,7 +1857,11 @@ find_reg (struct insn_chain *chain, int order) for (i = 0; i < FIRST_PSEUDO_REGISTER; i++) { +#ifdef REG_ALLOC_ORDER + unsigned int regno = reg_alloc_order[i]; +#else unsigned int regno = i; +#endif if (! TEST_HARD_REG_BIT (not_usable, regno) && ! TEST_HARD_REG_BIT (used_by_other_reload, regno) @@ -1810,6 +1880,38 @@ find_reg (struct insn_chain *chain, int order) } if (! ok) continue; + + if (flag_ira && optimize) + { + /* Ask IRA to find a better pseudo-register for + spilling. */ + for (n = j = 0; j < this_nregs; j++) + { + int r = hard_regno_to_pseudo_regno[regno + j]; + + if (r < 0) + continue; + if (n == 0 || regno_pseudo_regs[n - 1] != r) + regno_pseudo_regs[n++] = r; + } + regno_pseudo_regs[n++] = -1; + if (best_reg < 0 + || ira_better_spill_reload_regno_p (regno_pseudo_regs, + best_regno_pseudo_regs, + rl->in, rl->out, + chain->insn)) + { + best_reg = regno; + for (j = 0;; j++) + { + best_regno_pseudo_regs[j] = regno_pseudo_regs[j]; + if (regno_pseudo_regs[j] < 0) + break; + } + } + continue; + } + if (rl->in && REG_P (rl->in) && REGNO (rl->in) == regno) this_cost--; if (rl->out && REG_P (rl->out) && REGNO (rl->out) == regno) @@ -1857,6 +1959,7 @@ find_reg (struct insn_chain *chain, int order) { gcc_assert (spill_cost[best_reg + i] == 0); gcc_assert (spill_add_cost[best_reg + i] == 0); + gcc_assert (hard_regno_to_pseudo_regno[best_reg + i] == -1); SET_HARD_REG_BIT (used_spill_regs_local, best_reg + i); } return 1; @@ -2026,7 +2129,7 @@ delete_dead_insn (rtx insn) can share one stack slot. */ static void -alter_reg (int i, int from_reg) +alter_reg (int i, int from_reg, bool dont_share_p) { /* When outputting an inline function, this can happen for a reg that isn't actually used. */ @@ -2059,7 +2162,15 @@ alter_reg (int i, int from_reg) unsigned int total_size = MAX (inherent_size, reg_max_ref_width[i]); unsigned int min_align = reg_max_ref_width[i] * BITS_PER_UNIT; int adjust = 0; - + bool shared_p = false; + + if (flag_ira && optimize) + /* Mark the spill for IRA. */ + SET_REGNO_REG_SET (&spilled_pseudos, i); + x = (dont_share_p || ! flag_ira || ! optimize + ? NULL_RTX : ira_reuse_stack_slot (i, inherent_size, total_size)); + if (x) + shared_p = true; /* Each pseudo reg has an inherent size which comes from its own mode, and a total size which provides room for paradoxical subregs which refer to the pseudo reg in wider modes. @@ -2068,7 +2179,7 @@ alter_reg (int i, int from_reg) enough inherent space and enough total space. Otherwise, we allocate a new slot, making sure that it has no less inherent space, and no less total space, then the previous slot. */ - if (from_reg == -1) + else if (from_reg == -1 || (! dont_share_p && flag_ira && optimize)) { alias_set_type alias_set = new_alias_set (); @@ -2086,6 +2197,10 @@ alter_reg (int i, int from_reg) /* Nothing can alias this slot except this pseudo. */ set_mem_alias_set (x, alias_set); dse_record_singleton_alias_set (alias_set, mode); + + if (! dont_share_p && flag_ira && optimize) + /* Inform IRA about allocation a new stack slot. */ + ira_mark_new_stack_slot (x, i, total_size); } /* Reuse a stack slot if possible. */ @@ -2164,8 +2279,13 @@ alter_reg (int i, int from_reg) /* If we have a decl for the original register, set it for the memory. If this is a shared MEM, make a copy. */ - if (REG_EXPR (regno_reg_rtx[i]) - && DECL_P (REG_EXPR (regno_reg_rtx[i]))) + if (shared_p) + { + x = copy_rtx (x); + set_mem_attrs_from_reg (x, regno_reg_rtx[i]); + } + else if (REG_EXPR (regno_reg_rtx[i]) + && DECL_P (REG_EXPR (regno_reg_rtx[i]))) { rtx decl = DECL_RTL_IF_SET (REG_EXPR (regno_reg_rtx[i])); @@ -2441,7 +2561,7 @@ eliminate_regs_1 (rtx x, enum machine_mode mem_mode, rtx insn, /* There exists at least one use of REGNO that cannot be eliminated. Prevent the defining insn from being deleted. */ reg_equiv_init[regno] = NULL_RTX; - alter_reg (regno, -1); + alter_reg (regno, -1, true); } return x; @@ -3817,18 +3937,22 @@ finish_spills (int global) spill_reg_order[i] = -1; EXECUTE_IF_SET_IN_REG_SET (&spilled_pseudos, FIRST_PSEUDO_REGISTER, i, rsi) - { - /* Record the current hard register the pseudo is allocated to in - pseudo_previous_regs so we avoid reallocating it to the same - hard reg in a later pass. */ - gcc_assert (reg_renumber[i] >= 0); - - SET_HARD_REG_BIT (pseudo_previous_regs[i], reg_renumber[i]); - /* Mark it as no longer having a hard register home. */ - reg_renumber[i] = -1; - /* We will need to scan everything again. */ - something_changed = 1; - } + if (! flag_ira || ! optimize || reg_renumber[i] >= 0) + { + /* Record the current hard register the pseudo is allocated to + in pseudo_previous_regs so we avoid reallocating it to the + same hard reg in a later pass. */ + gcc_assert (reg_renumber[i] >= 0); + + SET_HARD_REG_BIT (pseudo_previous_regs[i], reg_renumber[i]); + /* Mark it as no longer having a hard register home. */ + reg_renumber[i] = -1; + if (flag_ira && optimize) + /* Inform IRA about the change. */ + ira_mark_allocation_change (i); + /* We will need to scan everything again. */ + something_changed = 1; + } /* Retry global register allocation if possible. */ if (global) @@ -3853,24 +3977,50 @@ finish_spills (int global) } } - /* Retry allocating the spilled pseudos. For each reg, merge the - various reg sets that indicate which hard regs can't be used, - and call retry_global_alloc. - We change spill_pseudos here to only contain pseudos that did not - get a new hard register. */ - for (i = FIRST_PSEUDO_REGISTER; i < (unsigned)max_regno; i++) - if (reg_old_renumber[i] != reg_renumber[i]) - { - HARD_REG_SET forbidden; - COPY_HARD_REG_SET (forbidden, bad_spill_regs_global); - IOR_HARD_REG_SET (forbidden, pseudo_forbidden_regs[i]); - IOR_HARD_REG_SET (forbidden, pseudo_previous_regs[i]); - retry_global_alloc (i, forbidden); - if (reg_renumber[i] >= 0) - CLEAR_REGNO_REG_SET (&spilled_pseudos, i); - } + if (! flag_ira || ! optimize) + { + /* Retry allocating the spilled pseudos. For each reg, + merge the various reg sets that indicate which hard regs + can't be used, and call retry_global_alloc. We change + spill_pseudos here to only contain pseudos that did not + get a new hard register. */ + for (i = FIRST_PSEUDO_REGISTER; i < (unsigned)max_regno; i++) + if (reg_old_renumber[i] != reg_renumber[i]) + { + HARD_REG_SET forbidden; + + COPY_HARD_REG_SET (forbidden, bad_spill_regs_global); + IOR_HARD_REG_SET (forbidden, pseudo_forbidden_regs[i]); + IOR_HARD_REG_SET (forbidden, pseudo_previous_regs[i]); + retry_global_alloc (i, forbidden); + if (reg_renumber[i] >= 0) + CLEAR_REGNO_REG_SET (&spilled_pseudos, i); + } + } + else + { + /* Retry allocating the pseudos spilled in IRA and the + reload. For each reg, merge the various reg sets that + indicate which hard regs can't be used, and call + ira_reassign_pseudos. */ + unsigned int n; + + for (n = 0, i = FIRST_PSEUDO_REGISTER; i < (unsigned) max_regno; i++) + if (reg_old_renumber[i] != reg_renumber[i]) + { + if (reg_renumber[i] < 0) + temp_pseudo_reg_arr[n++] = i; + else + CLEAR_REGNO_REG_SET (&spilled_pseudos, i); + } + if (ira_reassign_pseudos (temp_pseudo_reg_arr, n, + bad_spill_regs_global, + pseudo_forbidden_regs, pseudo_previous_regs, + &spilled_pseudos)) + something_changed = 1; + + } } - /* Fix up the register information in the insn chain. This involves deleting those of the spilled pseudos which did not get a new hard register home from the live_{before,after} sets. */ @@ -3879,9 +4029,14 @@ finish_spills (int global) HARD_REG_SET used_by_pseudos; HARD_REG_SET used_by_pseudos2; - AND_COMPL_REG_SET (&chain->live_throughout, &spilled_pseudos); - AND_COMPL_REG_SET (&chain->dead_or_set, &spilled_pseudos); - + if (! flag_ira || ! optimize) + { + /* Don't do it for IRA because IRA and the reload still can + assign hard registers to the spilled pseudos on next + reload iterations. */ + AND_COMPL_REG_SET (&chain->live_throughout, &spilled_pseudos); + AND_COMPL_REG_SET (&chain->dead_or_set, &spilled_pseudos); + } /* Mark any unallocated hard regs as available for spills. That makes inheritance work somewhat better. */ if (chain->need_reload) @@ -3890,20 +4045,18 @@ finish_spills (int global) REG_SET_TO_HARD_REG_SET (used_by_pseudos2, &chain->dead_or_set); IOR_HARD_REG_SET (used_by_pseudos, used_by_pseudos2); - /* Save the old value for the sanity test below. */ - COPY_HARD_REG_SET (used_by_pseudos2, chain->used_spill_regs); - compute_use_by_pseudos (&used_by_pseudos, &chain->live_throughout); compute_use_by_pseudos (&used_by_pseudos, &chain->dead_or_set); + /* Value of chain->used_spill_regs from previous iteration + may be not included in the value calculated here because + of possible removing caller-saves insns (see function + delete_caller_save_insns. */ COMPL_HARD_REG_SET (chain->used_spill_regs, used_by_pseudos); AND_HARD_REG_SET (chain->used_spill_regs, used_spill_regs); - - /* Make sure we only enlarge the set. */ - gcc_assert (hard_reg_set_subset_p (used_by_pseudos2, - chain->used_spill_regs)); } } + CLEAR_REG_SET (&changed_allocation_pseudos); /* Let alter_reg modify the reg rtx's for the modified pseudos. */ for (i = FIRST_PSEUDO_REGISTER; i < (unsigned)max_regno; i++) { @@ -3911,7 +4064,9 @@ finish_spills (int global) if (reg_old_renumber[i] == regno) continue; - alter_reg (i, reg_old_renumber[i]); + SET_REGNO_REG_SET (&changed_allocation_pseudos, i); + + alter_reg (i, reg_old_renumber[i], false); reg_old_renumber[i] = regno; if (dump_file) { @@ -4295,8 +4450,8 @@ reload_as_needed (int live_known) be partially clobbered by the call. */ else if (CALL_P (insn)) { - AND_COMPL_HARD_REG_SET (reg_reloaded_valid, call_used_reg_set); - AND_COMPL_HARD_REG_SET (reg_reloaded_valid, reg_reloaded_call_part_clobbered); + AND_COMPL_HARD_REG_SET (reg_reloaded_valid, call_used_reg_set); + AND_COMPL_HARD_REG_SET (reg_reloaded_valid, reg_reloaded_call_part_clobbered); } } @@ -4967,6 +5122,126 @@ reloads_unique_chain_p (int r1, int r2) return true; } + +/* The recursive function change all occurrences of WHAT in *WHERE + onto REPL. */ +static void +substitute (rtx *where, const_rtx what, rtx repl) +{ + const char *fmt; + int i; + enum rtx_code code; + + if (*where == 0) + return; + + if (*where == what || rtx_equal_p (*where, what)) + { + *where = repl; + return; + } + + code = GET_CODE (*where); + fmt = GET_RTX_FORMAT (code); + for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--) + { + if (fmt[i] == 'E') + { + int j; + + for (j = XVECLEN (*where, i) - 1; j >= 0; j--) + substitute (&XVECEXP (*where, i, j), what, repl); + } + else if (fmt[i] == 'e') + substitute (&XEXP (*where, i), what, repl); + } +} + +/* The function returns TRUE if chain of reload R1 and R2 (in any + order) can be evaluated without usage of intermediate register for + the reload containing another reload. It is important to see + gen_reload to understand what the function is trying to do. As an + example, let us have reload chain + + r2: const + r1: <something> + const + + and reload R2 got reload reg HR. The function returns true if + there is a correct insn HR = HR + <something>. Otherwise, + gen_reload will use intermediate register (and this is the reload + reg for R1) to reload <something>. + + We need this function to find a conflict for chain reloads. In our + example, if HR = HR + <something> is incorrect insn, then we cannot + use HR as a reload register for R2. If we do use it then we get a + wrong code: + + HR = const + HR = <something> + HR = HR + HR + +*/ +static bool +gen_reload_chain_without_interm_reg_p (int r1, int r2) +{ + bool result; + int regno, n, code; + rtx out, in, tem, insn; + rtx last = get_last_insn (); + + /* Make r2 a component of r1. */ + if (reg_mentioned_p (rld[r1].in, rld[r2].in)) + { + n = r1; + r1 = r2; + r2 = n; + } + gcc_assert (reg_mentioned_p (rld[r2].in, rld[r1].in)); + regno = rld[r1].regno >= 0 ? rld[r1].regno : rld[r2].regno; + gcc_assert (regno >= 0); + out = gen_rtx_REG (rld[r1].mode, regno); + in = copy_rtx (rld[r1].in); + substitute (&in, rld[r2].in, gen_rtx_REG (rld[r2].mode, regno)); + + /* If IN is a paradoxical SUBREG, remove it and try to put the + opposite SUBREG on OUT. Likewise for a paradoxical SUBREG on OUT. */ + if (GET_CODE (in) == SUBREG + && (GET_MODE_SIZE (GET_MODE (in)) + > GET_MODE_SIZE (GET_MODE (SUBREG_REG (in)))) + && (tem = gen_lowpart_common (GET_MODE (SUBREG_REG (in)), out)) != 0) + in = SUBREG_REG (in), out = tem; + + if (GET_CODE (in) == PLUS + && (REG_P (XEXP (in, 0)) + || GET_CODE (XEXP (in, 0)) == SUBREG + || MEM_P (XEXP (in, 0))) + && (REG_P (XEXP (in, 1)) + || GET_CODE (XEXP (in, 1)) == SUBREG + || CONSTANT_P (XEXP (in, 1)) + || MEM_P (XEXP (in, 1)))) + { + insn = emit_insn (gen_rtx_SET (VOIDmode, out, in)); + code = recog_memoized (insn); + result = false; + + if (code >= 0) + { + extract_insn (insn); + /* We want constrain operands to treat this insn strictly in + its validity determination, i.e., the way it would after + reload has completed. */ + result = constrain_operands (1); + } + + delete_insns_since (last); + return result; + } + + /* It looks like other cases in gen_reload are not possible for + chain reloads or do need an intermediate hard registers. */ + return true; +} + /* Return 1 if the reloads denoted by R1 and R2 cannot share a register. Return 0 otherwise. @@ -5016,7 +5291,8 @@ reloads_conflict (int r1, int r2) case RELOAD_FOR_OPERAND_ADDRESS: return (r2_type == RELOAD_FOR_INPUT || r2_type == RELOAD_FOR_INSN || (r2_type == RELOAD_FOR_OPERAND_ADDRESS - && !reloads_unique_chain_p (r1, r2))); + && (!reloads_unique_chain_p (r1, r2) + || !gen_reload_chain_without_interm_reg_p (r1, r2)))); case RELOAD_FOR_OPADDR_ADDR: return (r2_type == RELOAD_FOR_INPUT @@ -6724,7 +7000,10 @@ emit_input_reload_insns (struct insn_chain *chain, struct reload *rl, && REG_N_SETS (REGNO (old)) == 1) { reg_renumber[REGNO (old)] = REGNO (reloadreg); - alter_reg (REGNO (old), -1); + if (flag_ira && optimize) + /* Inform IRA about the change. */ + ira_mark_allocation_change (REGNO (old)); + alter_reg (REGNO (old), -1, false); } special = 1; } @@ -8161,7 +8440,7 @@ delete_output_reload (rtx insn, int j, int last_reload_reg, rtx new_reload_reg) n_occurrences += count_occurrences (PATTERN (insn), eliminate_regs (substed, 0, NULL_RTX), 0); - for (i1 = reg_equiv_alt_mem_list [REGNO (reg)]; i1; i1 = XEXP (i1, 1)) + for (i1 = reg_equiv_alt_mem_list[REGNO (reg)]; i1; i1 = XEXP (i1, 1)) { gcc_assert (!rtx_equal_p (XEXP (i1, 0), substed)); n_occurrences += count_occurrences (PATTERN (insn), XEXP (i1, 0), 0); @@ -8262,7 +8541,10 @@ delete_output_reload (rtx insn, int j, int last_reload_reg, rtx new_reload_reg) /* For the debugging info, say the pseudo lives in this reload reg. */ reg_renumber[REGNO (reg)] = REGNO (new_reload_reg); - alter_reg (REGNO (reg), -1); + if (flag_ira && optimize) + /* Inform IRA about the change. */ + ira_mark_allocation_change (REGNO (reg)); + alter_reg (REGNO (reg), -1, false); } else { diff --git a/gcc/rtl.h b/gcc/rtl.h index 5281b1484db..42fc2ad6b64 100644 --- a/gcc/rtl.h +++ b/gcc/rtl.h @@ -1805,6 +1805,12 @@ rtx remove_list_elem (rtx, rtx *); /* regclass.c */ +/* Initialize may_move_cost and friends for mode M. */ +extern void init_move_cost (enum machine_mode); +/* Allocate register info memory. */ +extern void allocate_reg_info (void); +/* Resize reg info. */ +extern void resize_reg_info (void); /* Free up register info memory. */ extern void free_reg_info (void); @@ -1815,6 +1821,7 @@ extern const char *decode_asm_operands (rtx, rtx *, rtx **, const char **, extern enum reg_class reg_preferred_class (int); extern enum reg_class reg_alternate_class (int); +extern void setup_reg_classes (int, enum reg_class, enum reg_class); extern void split_all_insns (void); extern unsigned int split_all_insns_noflow (void); @@ -2183,12 +2190,16 @@ extern bool can_copy_p (enum machine_mode); extern rtx fis_get_condition (rtx); /* In global.c */ +#ifdef HARD_CONST +extern HARD_REG_SET eliminable_regset; +#endif extern void mark_elimination (int, int); extern void dump_global_regs (FILE *); #ifdef HARD_CONST /* Yes, this ifdef is silly, but HARD_REG_SET is not always defined. */ extern void retry_global_alloc (int, HARD_REG_SET); #endif +extern void build_insn_chain (void); /* In regclass.c */ extern int reg_classes_intersect_p (enum reg_class, enum reg_class); @@ -2214,6 +2225,7 @@ extern void dbr_schedule (rtx); /* In local-alloc.c */ extern void dump_local_alloc (FILE *); +extern int update_equiv_regs (void); /* In reload1.c */ extern int function_invariant_p (const_rtx); diff --git a/gcc/testsuite/gcc.dg/20080410-1.c b/gcc/testsuite/gcc.dg/20080410-1.c new file mode 100644 index 00000000000..ebd783dd05d --- /dev/null +++ b/gcc/testsuite/gcc.dg/20080410-1.c @@ -0,0 +1,28 @@ +/* { dg-do compile { target "sh-*-*" } } */ +/* { dg-options "-O0 -m4 -ml -fira" } */ +/* { dg-final { scan-assembler-not "add\tr0,r0" } } */ + +/* This test checks that chain reloads conflict. I they don't + conflict, the same hard register R0 is used for the both reloads + but in this case the second reload needs an intermediate register + (which is the reload register). As the result we have the + following code + + mov #4,r0 -- first reload + mov r14,r0 -- second reload + add r0,r0 -- second reload + + The right code should be + + mov #4,r0 -- first reload + mov r14,r1 -- second reload + add r0,r1 -- second reload + +*/ + +_Complex float foo_float (); + +void bar_float () +{ + __real foo_float (); +} diff --git a/gcc/timevar.def b/gcc/timevar.def index a2693fcc199..94fa087754e 100644 --- a/gcc/timevar.def +++ b/gcc/timevar.def @@ -179,6 +179,8 @@ DEFTIMEVAR (TV_SMS , "sms modulo scheduling") DEFTIMEVAR (TV_SCHED , "scheduling") DEFTIMEVAR (TV_LOCAL_ALLOC , "local alloc") DEFTIMEVAR (TV_GLOBAL_ALLOC , "global alloc") +DEFTIMEVAR (TV_IRA , "integrated RA") +DEFTIMEVAR (TV_RELOAD , "reload") DEFTIMEVAR (TV_RELOAD_CSE_REGS , "reload CSE regs") DEFTIMEVAR (TV_SEQABSTR , "sequence abstraction") DEFTIMEVAR (TV_GCSE_AFTER_RELOAD , "load CSE after reload") diff --git a/gcc/toplev.c b/gcc/toplev.c index 38d8eaf80af..7ace61c2b23 100644 --- a/gcc/toplev.c +++ b/gcc/toplev.c @@ -66,6 +66,7 @@ along with GCC; see the file COPYING3. If not see #include "diagnostic.h" #include "params.h" #include "reload.h" +#include "ira.h" #include "dwarf2asm.h" #include "integrate.h" #include "real.h" @@ -270,6 +271,14 @@ int flag_next_runtime = 0; enum tls_model flag_tls_default = TLS_MODEL_GLOBAL_DYNAMIC; +/* Set the default algorithm for the integrated register allocator. */ + +enum ira_algorithm flag_ira_algorithm = IRA_ALGORITHM_MIXED; + +/* Set the default value for -fira-verbose. */ + +unsigned int flag_ira_verbose = 5; + /* Nonzero means change certain warnings into errors. Usually these are warnings about failure to conform to some standard. */ @@ -2009,6 +2018,7 @@ backend_init (void) save_register_info (); /* Initialize the target-specific back end pieces. */ + ira_init_once (); backend_init_target (); } @@ -2029,9 +2039,10 @@ lang_dependent_init_target (void) /* Do the target-specific parts of expr initialization. */ init_expr_target (); - /* Although the actions of init_set_costs are language-independent, - it uses optabs, so we cannot call it from backend_init. */ + /* Although the actions of these functions are language-independent, + they use optabs, so we cannot call them from backend_init. */ init_set_costs (); + ira_init (); expand_dummy_function_end (); } @@ -2132,6 +2143,8 @@ finalize (void) statistics_fini (); finish_optimization_passes (); + ira_finish_once (); + if (mem_report) dump_memory_report (true); diff --git a/gcc/toplev.h b/gcc/toplev.h index 00646132d58..5555bf1e093 100644 --- a/gcc/toplev.h +++ b/gcc/toplev.h @@ -139,6 +139,11 @@ extern int flag_unroll_all_loops; extern int flag_unswitch_loops; extern int flag_cprop_registers; extern int time_report; +extern int flag_ira; +extern int flag_ira_coalesce; +extern int flag_ira_move_spills; +extern int flag_ira_share_save_slots; +extern int flag_ira_share_spill_slots; /* Things to do with target switches. */ extern void print_version (FILE *, const char *); diff --git a/gcc/tree-pass.h b/gcc/tree-pass.h index acb05193b4b..f4e02e86a5e 100644 --- a/gcc/tree-pass.h +++ b/gcc/tree-pass.h @@ -469,6 +469,7 @@ extern struct rtl_opt_pass pass_sms; extern struct rtl_opt_pass pass_sched; extern struct rtl_opt_pass pass_local_alloc; extern struct rtl_opt_pass pass_global_alloc; +extern struct rtl_opt_pass pass_ira; extern struct rtl_opt_pass pass_postreload; extern struct rtl_opt_pass pass_clean_state; extern struct rtl_opt_pass pass_branch_prob; |