diff options
-rw-r--r-- | gcc/ChangeLog | 441 | ||||
-rw-r--r-- | gcc/config/ia64/ia64-protos.h | 4 | ||||
-rw-r--r-- | gcc/config/ia64/ia64.c | 2665 | ||||
-rw-r--r-- | gcc/config/ia64/ia64.h | 30 | ||||
-rw-r--r-- | gcc/config/ia64/ia64.md | 103 | ||||
-rw-r--r-- | gcc/config/ia64/itanium1.md | 1616 | ||||
-rw-r--r-- | gcc/config/ia64/itanium2.md | 1762 | ||||
-rw-r--r-- | gcc/doc/invoke.texi | 8 | ||||
-rw-r--r-- | gcc/doc/md.texi | 114 | ||||
-rw-r--r-- | gcc/doc/tm.texi | 34 | ||||
-rw-r--r-- | gcc/genattr.c | 5 | ||||
-rw-r--r-- | gcc/genattrtab.c | 8 | ||||
-rw-r--r-- | gcc/genattrtab.h | 2 | ||||
-rw-r--r-- | gcc/genautomata.c | 1555 | ||||
-rw-r--r-- | gcc/haifa-sched.c | 340 | ||||
-rw-r--r-- | gcc/rtl.def | 89 | ||||
-rw-r--r-- | gcc/sched-deps.c | 136 | ||||
-rw-r--r-- | gcc/sched-ebb.c | 13 | ||||
-rw-r--r-- | gcc/sched-rgn.c | 64 | ||||
-rw-r--r-- | gcc/target-def.h | 6 | ||||
-rw-r--r-- | gcc/target.h | 24 |
21 files changed, 6770 insertions, 2249 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 406d333604c..edae8315f57 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,444 @@ +2003-01-09 Vladimir Makarov <vmakarov@redhat.com> + + The following changes are merged from itanium-sched-branch: + + 2003-01-08 David Edelsohn <edelsohn@gnu.org> + + * doc/md.texi: Clarify assignment of units to automata description. + + 2003-01-08 Vladimir Makarov <vmakarov@redhat.com> + + * genautomata.c (unit_decl): Remove members + `the_same_automaton_unit' and + `the_same_automaton_message_reported_p'. + (process_unit_to_form_the_same_automaton_unit_lists, + form_the_same_automaton_unit_lists_from_regexp, + form_the_same_automaton_unit_lists, the_same_automaton_lists): + Remove them. + (annotation_message_reported_p): New global variable. + (check_unit_distribution_in_reserv, + check_regexp_units_distribution): New functions. + (check_unit_distributions_to_automata): Rewrite it. + + 2003-01-04 Vladimir Makarov <vmakarov@redhat.com> + + * genautomata.c (form_the_same_automaton_unit_lists_from_regexp): + Use continue instead of break if cycle is too big. + + 2002-12-20 Vladimir Makarov <vmakarov@redhat.com> + + * genautomata.c (check_unit_distributions_to_automata): Output at + most one message for a unit. + (process_unit_to_form_the_same_automaton_unit_lists): Check + automaton of units instead of units themself. + + * doc/md.texi: Describe the constraint about assigning unit to + automata. + + 2002-12-20 Jan Hubicka <jH@suse.cz> + Vladimir Makarov <vmakarov@redhat.com> + + * genautomata.c (unit_decl): Add new members `min_occ_cycle_num' + and `in_set_p'. + (gen_cpu_unit): Initialize the new members. + (process_regexp_cycles): Calculate minimal finish cycle too. Set + up `min_occ_cycle_num'. + (evaluate_max_reserv_cycles): Change the function call. + (CLEAR_BIT): New macro. + (states_union, state_shift): Use the mask. + (initiate_excl_sets, form_reserv_sets_list): Set up `in_set_p'. + (form_reservs_matter): New function. + (make_automaton): Call the function and use the mask. + (estimate_one_automaton_bound): Take `min_occ_cycle_num' into + account. + + 2002-12-17 Vladimir Makarov <vmakarov@redhat.com> + + * config/ia64/itanium2.md (lfetch): Change the insn reservation. + + 2002-12-17 Vladimir Makarov <vmakarov@redhat.com> + + * config/ia64/ia64.c (bundling): Try to insert 2 nops for M insn + for Itanium. + + 2002-12-17 Vladimir Makarov <vmakarov@redhat.com> + + * config/ia64/ia64.c (ia64_override_options): Make itanium2 as + default cpu. + + 2002-12-17 Vladimir Makarov <vmakarov@redhat.com> + 2002-10-31 Dale Johannesen <dalej@apple.com> + + * haifa-sched.c (find_set_reg_weight): New function. + (find_insn_reg_weight): Use the new function. + (schedule_block): Do sorting ready queue always + after insn issue. + + 2002-11-27 Vladimir Makarov <vmakarov@redhat.com> + + * config/ia64/ia64.c (bundling): Use MFI template instead of MLX. + + 2002-11-19 Vladimir Makarov <vmakarov@redhat.com> + + * haifa-sched.c (choice_entry): New structure. + (choice_stack, cycle_issued_insns): New variables. + (max_issue): Rewrite it. + (choose_ready): Set up ready_try for unknown insns too. + (schedule_block): Allocate and free choice_stack. Set up + and modify cycle_issued_insns. + + * config/ia64/ia64.c (issue_nops_and_insn): Combine insn issue + with and without filling the bundle. + (bundling): Combine calls of issue_nops_and_insn. + + 2002-10-17 Vladimir Makarov <vmakarov@redhat.com> + + * config/ia64/itanium1.md: New file. + + * config/ia64/itanium2.md: New file. + + * config/ia64/ia64.md: Move DFA descriptions into the new files. + Remove the old pipeline description. + + * config/ia64/ia64.c (ia64_override_options): Add aliases of + itanium processor names. + + 2002-10-16 Vladimir Makarov <vmakarov@redhat.com> + + * config/ia64/ia64.c (bundling): Print states for Itanium2 too. + (ia64_reorg): Set up queried unit codes for Itanium2 too. + + * config/ia64/ia64.md: Add descriptions for Itanium2. + + 2002-10-08 Vladimir Makarov <vmakarov@redhat.com> + + * config/ia64/ia64.h (processor_type): New enumeration. + (ia64_tune, ia64_tune_string): New external declarations. + (TARGET_OPTIONS): Add option `tune='. + + * config/ia64/ia64.c (ia64_tune, ia64_tune_string): New global + variables. + (ia64_override_options): Set up `ia64_tune'. + (ia64_sched_reorder2): Set up `clocks' only for Itanium. + (ia64_dfa_new_cycle): Set up `add_cycles' only for Itanium. + (bundling): Add nops for MM-insns only for Itanium. + (ia64_reorg): Allocate and free `clocks' and `add_cycles' only for + Itanium. + + * config/ia64/ia64.md (cpu): New attribute. + (DFA description): Enable it only for Itanium. + + 2002-10-08 Vladimir Makarov <vmakarov@redhat.com> + Richard Henderson <rth@redhat.com> + + * config/ia64/ia64.h (MASK_TUNE_STOP_BITS): Rename it to + MASK_EARLY_STOP_BITS. + (TARGET_TUNE_STOP_BITS): Rename it to TARGET_EARLY_STOP_BITS. + (TARGET_SWITCHES): Rename option `tune-stop-bits' to + `early-stop-bits'. + + * config/ia64/ia64.c (ia64_dfa_new_cycle, + final_emit_insn_group_barriers): Use TARGET_EARLY_STOP_BITS + instead of TARGET_TUNE_STOP_BITS. + + * doc/invoke.texi: Rename option `-mtune-stop-bits' to + `-mearly-stop-bits'. + + * config/ia64/ia64.c (automata_option "v"): Comment it. + + 2002-10-07 Vladimir Makarov <vmakarov@redhat.com> + + * config/ia64/ia64.h (MASK_TUNE_STOP_BITS, TARGET_TUNE_STOP_BITS): + New macros. + (TARGET_SWITCHES): Add entries for the new option. + + * config/ia64/ia64.c (dfa_stop_insn, last_scheduled_insn, rtx + dfa_pre_cycle_insn, ia64_nop): Don't make them as roots for GC. + (stops_p, stop_before_p, clocks_length, clocks, add_cycles): New + global variables. + (ia64_sched_reorder2): Set up `clocks'. + (ia64_variable_issue): Set up `stops_p' and reset `stop_before_p'. + (ia64_dfa_new_cycle): Set up add_cycle. Permit sorting ready + queue when TARGET_TUNE_STOP_BITS. + (bundling): Insert additional nops for MM-insns. + (final_emit_insn_group_barriers): Add insertion of stop bits + according `stops_p'. + (ia64_reorg): Initiate the new varibales. + + * doc/invoke.texi: Add description of option `-mtune-stop-bits'. + + 2002-10-02 Vladimir Makarov <vmakarov@redhat.com> + + * haifa-sched.c (schedule_block): Modify INSN_TICK of depended + insns at the end of block insn scheduling. + + 2002-09-30 Vladimir Makarov <vmakarov@redhat.com> + + * sched-deps.c (remove_dependence, group_leader): Remove it. + (add_dependence): Add dependence to group leader to. + (set_sched_group_p): Add dependence to the first insn of the + schedule group too. + (sched_analyze_insn): Make dependence to jump as anti-dependence. + Change true dependence by anti-dependence when + `reg_pending_barrier'. + + * sched-rgn.c (init_ready_list, can_schedule_ready_p, + add_branch_dependences): Ignore schedule groups. + + * sched-ebb.c (init_ready_list): Ditto. + + * (move_insn, set_priorities): Ditto. + + * config/ia64/ia64.c (ia64_sched_init): Check that schedule group + flag is clear after reload. + (adjust_cost): Change cost only for output dependencies. + + * config/ia64/ia64.md: Add more insns into bypass for MM-insns. + + 2002-09-26 Vladimir Makarov <vmakarov@redhat.com> + + * Makefile.in (sched-ebb.o): Add `$(TARGET_H)' to the entry. + + * target.h (gcc_target): Add member + `dependencies_evaluation_hook'. + + * target-def.h (TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK): New + macro. + (TARGET_SCHED): Add initiatialization of the new member. + + * sched-ebb.c: Include `target.h'. + (schedule_ebb): Call `dependencies_evaluation_hook'. + + * sched-rgn.c (schedule_region): Call + `dependencies_evaluation_hook'. + + * config/ia64/ia64.c (TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK): + New macro. + (ia64_dependencies_evaluation_hook): New function. + + * doc/tm.texi (TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK): + Describe the new hook. + + 2002-09-25 Vladimir Makarov <vmakarov@redhat.com> + + * target.h (gcc_target): Add members + `first_cycle_multipass_dfa_lookahead_guard' and `dfa_new_cycle'. + + * target-def.h (TARGET_SCHED_DFA_NEW_CYCLE, + TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD): New + macros. + (TARGET_SCHED): Add initiatialization of the new members. + + * haifa-sched.c (schedule_insn): Update last_clock_var for the 1st + insn scheduling too. + (choose_ready): Use `first_cycle_multipass_dfa_lookahead_guard' to + initialize `ready_try'. + (schedule_block): Use `dfa_new_cycle'. Sort `ready' only unless + `dfa_new_cycle' says not to do it. + + * config/ia64/ia64.md: Add DFA Itanium 1 description for insn + bundling. + + * config/ia64/ia64.h (CPU_UNITS_QUERY): New macro. + + * config/ia64/ia64.c: Include `hashtab.h'. + (ia64_first_cycle_multipass_dfa_lookahead_guard, + ia64_dfa_new_cycle, final_emit_insn_group_barriers, + ia64_dfa_sched_reorder, get_free_bundle_state, free_bundle_state, + initiate_bundle_states, finish_bundle_states, bundle_state_hash, + bundle_state_eq_p, insert_bundle_state, + initiate_bundle_state_table, finish_bundle_state_table, + try_issue_nops, try_issue_insn, issue_nops_and_insn, get_max_pos, + get_template, get_next_important_insn, bundling): New functions. + (ia64_internal_sched_reorder): Remove it. + (TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD, + TARGET_SCHED_DFA_NEW_CYCLE): New macros. + (ia64_safe_itanium_requires_unit0): Remove it. + (group_barrier_needed_p): Place group barrier right before a real + insn. + (bundle, ia64_packet, NR_PACKETS, type_names, packets, packets): + Remove them. + (bundle_name): New variable. + (_0mii_, _0mmi_, _0mfi_, _0mmf_, _0bbb_, _0mbb_, _0mib_, _0mmb_, + _0mfb_, _0mlx_, _1mii_, _1mmi_, _1mfi_, _1mmf_, _1bbb_, _1mbb_, + _1mib_, _1mmb_, _1mfb_, _1mlx_, pos_1, pos_2, pos_3, pos_4, pos_5, + pos_6, dfa_stop_insn, last_scheduled_insn, dfa_state_size, + temp_dfa_state, prev_cycle_state): New global variables. + (insn_matches_slot, maybe_rotate, finish_last_head, + rotate_one_bundle, rotate_one_bundles, nop_cycles_until, + cycle_end_fill_slots, packet_matches_p, get_split, find_best_insn, + find_best_packet, itanium_reorder, dump_current_packet, + schedule_stop, gen_nop_type, ia64_emit_nops): Remove them. + (sched_data, sched_ready, sched_types): Remove them. + (ia64_sched_init): Initiate only `last_scheduled_insn' and call + `init_insn_group_barriers'. + (ia64_sched_reorder, ia64_sched_reorder2): Call + ia64_dfa_sched_reorder. + (ia64_variable_issue): Rewrite it. + (bundle_state): New structure. + (index_to_bundle_states, bundle_states_num, + allocated_bundle_states_chain, free_bundle_state_chain): New + global variables. + (ia64_sched_finish): Add stop bits and call `bundling' after the + 2nd insn scheduling. + (ia64_use_dfa_pipeline_interface): Return zero always. + (ia64_first_cycle_multipass_dfa_lookahead): Return 6 for the 2nd + insn scheduling. + (ia64_init_dfa_pre_cycle_insn): Initialize `dfa_state_size', + `temp_dfa_state', `prev_cycle_state', and `dfa_stop_insn'. + (ia64_reorg): Add bundling insns. + + * doc/tm.texi + (TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD, + TARGET_SCHED_DFA_NEW_CYCLE): Describe the new hooks. + + 2002-09-23 Vladimir Makarov <vmakarov@redhat.com> + + * config/ia64/ia64.md: Add Itanium1 DFA description. + (itanium_class): Add `nop' and `pre_cycle'. Add + define_function_unit for `nop'. + (nop): Change attribute `itanium_class'. + (pre_cycle): New define_insn. + + * config/ia64/ia64-protos.h (bundling_p): New external variable. + (ia64_st_address_bypass_p, ia64_ld_address_bypass_p, + ia64_produce_address_p): New function prototypes. + + * config/ia64/ia64.c (bundling_p): New global variable. + (ia64_use_dfa_pipeline_interface, + ia64_first_cycle_multipass_dfa_lookahead, + ia64_init_dfa_pre_cycle_insn, ia64_dfa_pre_cycle_insn): New + functions. + (TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE, + TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD, + TARGET_SCHED_INIT_DFA_PRE_CYCLE_INSN, + TARGET_SCHED_DFA_PRE_CYCLE_INSN): New macros. + (ia64_sched_init, ia64_sched_reorder, ia64_sched_reorder2, + ia64_variable_issue, ia64_sched_finish): Do nothing before reload. + (dfa_pre_cycle_insn): New variable. + + 2002-09-20 Vladimir Makarov <vmakarov@redhat.com> + + * rtl.def (FINAL_PRESENCE_SET, FINAL_ABSENCE_SET): New + constructions. + + * genattrtab.h (gen_final_presence_set, gen_final_absence_set): + New function prototypes. + + * genattrtab.c (main): Process the new constructions. + + * genautomata.c (gen_presence_absence_set, + process_presence_absence_names, process_presence_absence_patterns, + add_presence_absence, check_absence_pattern_sets): Add parameter + `final_p'. + (unit_decl): Add new members `final_presence_list' and + `final_absence_list'. + (unit_pattern_rel_decl): Add new member `final_p'. + (gen_final_presence_set, gen_final_absence_set): New functions. + (process_decls): Use member `final_p'. + (temp_reserv): New global variable. + (reserv_sets_are_intersected): Add processing `final_presence_set' + and `final_absence_set'. + (initiate_states): Allocate `temp_reserv'. + (unit_final_presence_set_table, unit_final_absence_set_table): New + gloabal variables. + (initiate_presence_absence_pattern_sets): Initiate them. + (NDFA_to_DFA): Fix typo. + (output_description): Output `final_presence_set' and + `final_absence_set'. + + * doc/md.texi (final_presence_set, final_absence_set): Describe + them. + + 2002-09-20 Vladimir Makarov <vmakarov@redhat.com> + + * genautomata.c (transform_3): Process a missing case (nothing on + unit place). + + 2002-09-20 Vladimir Makarov <vmakarov@redhat.com> + + * rtl.def (DEFINE_QUERY_CPU_UNIT, AUTOMATA_OPTION): Change + comments about queried units and the minimization. + + * doc/md.texi: Ditto. + + * genautomata.c (create_composed_state): Return nonzero if the new + state has been created. + (first_cycle_unit_presence): New function. + (state_is_differed): Add new parameter. Use the new function. + Take queried units into account. + (partition_equiv_class): Pass additional parameter to + `state_is_differed'. + (merge_states): Process composed states too. + (build_automaton, create_automata, output_min_issue_delay_table, + output_tables, output_statistics): Output more information. + (output_reserved_units_table): Use function + `first_cycle_unit_presence'. + (output_tables): Output table of queried units even if the + minimization is switched on. + (write_automata): Output code for querying units even if the + minimization is switched on. + + 2002-09-19 Vladimir Makarov <vmakarov@redhat.com> + + * rtl.def (PRESENCE_SET, ABSENCE_SET): Add comments about extended + syntax of the constructions. + + * doc/md.texi (PRESENCE_SET, ABSENCE_SET): Add description of + extended syntax of the constructions. + + * genautomata.c (unit_rel_decl): Rename it to + `unit_pattern_rel_decl'. + (pattern_set_el, pattern_reserv): New structures. + (pattern_set_el_t, pattern_reserv_t): New types. + (gen_presence_absence_set): New function. + (process_presence_absence): Remove it. + (process_presence_absence_names, + process_presence_absence_patterns): New functions. + (get_presence_absence_set): Remove it. + (initiate_presence_absence_sets): Rename it on + `initiate_presence_absence_pattern_sets'. Use new function + `form_reserv_sets_list'. + (form_reserv_sets_list, check_presence_pattern_sets, + check_absence_pattern_sets, output_pattern_set_el_list): New + functions. + (unit_decl): Change types of members `presence_list' and + `absence_list'. + (unit_rel_decl): Rename member `names_num' to `all_names_num'. + (decl): Change types of members `excl', `presence', and `absence'. + (get_str_vect): Rename `par_flag' to `paren_p'. Add null element + at the end of the vector. + (gen_cpu_unit, gen_query_cpu_unit, gen_bypass, gen_excl_set, + gen_automaton, gen_regexp_repeat, gen_regexp_allof, + gen_regexp_oneof, gen_regexp_sequence): Use boolean values. + (gen_presence_set, gen_absence_set): Use new function + `gen_presence_absence_set'. + (add_presence_absence): Process `pattern_list' instead of + `source_list'. + (process_decls): USe new functions + `process_presence_absence_names' and + `process_presence_absence_patterns'. + (reserv_sets_are_intersected): Use new function + `check_presence_pattern_sets'. + (presence_set, absence_set): Remove them. + (unit_presence_set_table, unit_absence_set_table): New global + variables. + (output_description): Use new function + `output_pattern_set_el_list'. + (generate): Use `initiate_presence_absence_pattern_sets'. + + 2002-09-18 Vladimir Makarov <vmakarov@redhat.com> + + * genattr.c (main): Add output of prototype of new interface + function `dfa_clean_insn_cache'. + + * genautomata.c (output_dfa_clean_insn_cache_func): New function. + (DFA_CLEAN_INSN_CACHE_FUNC_NAME): New macro. + (output_dfa_start_func): Use function `dfa_clean_insn_cache' in + the generated code. + (write_automata): Call the new function. + + Thu Jan 9 22:47:38 CET 2003 Jan Hubicka <jh@suse.cz> * i386.md (unit, prefix_0f, memory attributes): Hanlde sseicvt correctly. diff --git a/gcc/config/ia64/ia64-protos.h b/gcc/config/ia64/ia64-protos.h index 0aba9390aae..63b8572067e 100644 --- a/gcc/config/ia64/ia64-protos.h +++ b/gcc/config/ia64/ia64-protos.h @@ -27,7 +27,11 @@ extern GTY(()) rtx ia64_compare_op1; /* Functions defined in ia64.c */ +extern int bundling_p; #ifdef RTX_CODE +extern int ia64_st_address_bypass_p PARAMS((rtx, rtx)); +extern int ia64_ld_address_bypass_p PARAMS((rtx, rtx)); +extern int ia64_produce_address_p PARAMS((rtx)); extern int call_operand PARAMS((rtx, enum machine_mode)); extern int sdata_symbolic_operand PARAMS((rtx, enum machine_mode)); extern int got_symbolic_operand PARAMS((rtx, enum machine_mode)); diff --git a/gcc/config/ia64/ia64.c b/gcc/config/ia64/ia64.c index 3009d1c1200..039d11aaac7 100644 --- a/gcc/config/ia64/ia64.c +++ b/gcc/config/ia64/ia64.c @@ -47,6 +47,7 @@ Boston, MA 02111-1307, USA. */ #include "target.h" #include "target-def.h" #include "tm_p.h" +#include "hashtab.h" /* This is used for communication between ASM_OUTPUT_LABEL and ASM_OUTPUT_LABELREF. */ @@ -103,6 +104,12 @@ int ia64_tls_size = 22; /* String used with the -mtls-size= option. */ const char *ia64_tls_size_string; +/* Which cpu are we scheduling for. */ +enum processor_type ia64_tune; + +/* String used with the -tune= option. */ +const char *ia64_tune_string; + /* Determines whether we run our final scheduling pass or not. We always avoid the normal second scheduling pass. */ static int ia64_flag_schedule_insns2; @@ -111,7 +118,19 @@ static int ia64_flag_schedule_insns2; sections. */ unsigned int ia64_section_threshold; + +/* The following variable is used by the DFA insn scheduler. The value is + TRUE if we do insn bundling instead of insn scheduling. */ +int bundling_p = 0; + +static int ia64_use_dfa_pipeline_interface PARAMS ((void)); +static int ia64_first_cycle_multipass_dfa_lookahead PARAMS ((void)); +static void ia64_dependencies_evaluation_hook PARAMS ((rtx, rtx)); +static void ia64_init_dfa_pre_cycle_insn PARAMS ((void)); +static rtx ia64_dfa_pre_cycle_insn PARAMS ((void)); +static int ia64_first_cycle_multipass_dfa_lookahead_guard PARAMS ((rtx)); +static int ia64_dfa_new_cycle PARAMS ((FILE *, int, rtx, int, int, int *)); static rtx gen_tls_get_addr PARAMS ((void)); static rtx gen_thread_pointer PARAMS ((void)); static int find_gr_spill PARAMS ((int)); @@ -132,6 +151,7 @@ static void fix_range PARAMS ((const char *)); static struct machine_function * ia64_init_machine_status PARAMS ((void)); static void emit_insn_group_barriers PARAMS ((FILE *, rtx)); static void emit_all_insn_group_barriers PARAMS ((FILE *, rtx)); +static void final_emit_insn_group_barriers PARAMS ((FILE *)); static void emit_predicate_relation_info PARAMS ((void)); static bool ia64_in_small_data_p PARAMS ((tree)); static void ia64_encode_section_info PARAMS ((tree, int)); @@ -157,12 +177,31 @@ static int ia64_issue_rate PARAMS ((void)); static int ia64_adjust_cost PARAMS ((rtx, rtx, rtx, int)); static void ia64_sched_init PARAMS ((FILE *, int, int)); static void ia64_sched_finish PARAMS ((FILE *, int)); -static int ia64_internal_sched_reorder PARAMS ((FILE *, int, rtx *, - int *, int, int)); +static int ia64_dfa_sched_reorder PARAMS ((FILE *, int, rtx *, int *, + int, int)); static int ia64_sched_reorder PARAMS ((FILE *, int, rtx *, int *, int)); static int ia64_sched_reorder2 PARAMS ((FILE *, int, rtx *, int *, int)); static int ia64_variable_issue PARAMS ((FILE *, int, rtx, int)); +static struct bundle_state *get_free_bundle_state PARAMS ((void)); +static void free_bundle_state PARAMS ((struct bundle_state *)); +static void initiate_bundle_states PARAMS ((void)); +static void finish_bundle_states PARAMS ((void)); +static unsigned bundle_state_hash PARAMS ((const void *)); +static int bundle_state_eq_p PARAMS ((const void *, const void *)); +static int insert_bundle_state PARAMS ((struct bundle_state *)); +static void initiate_bundle_state_table PARAMS ((void)); +static void finish_bundle_state_table PARAMS ((void)); +static int try_issue_nops PARAMS ((struct bundle_state *, int)); +static int try_issue_insn PARAMS ((struct bundle_state *, rtx)); +static void issue_nops_and_insn PARAMS ((struct bundle_state *, int, + rtx, int)); +static int get_max_pos PARAMS ((state_t)); +static int get_template PARAMS ((state_t, int)); + +static rtx get_next_important_insn PARAMS ((rtx, rtx)); +static void bundling PARAMS ((FILE *, int, rtx, rtx)); + static void ia64_output_mi_thunk PARAMS ((FILE *, tree, HOST_WIDE_INT, HOST_WIDE_INT, tree)); @@ -244,6 +283,27 @@ static const struct attribute_spec ia64_attribute_table[] = #undef TARGET_SCHED_REORDER2 #define TARGET_SCHED_REORDER2 ia64_sched_reorder2 +#undef TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK +#define TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK ia64_dependencies_evaluation_hook + +#undef TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE +#define TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE ia64_use_dfa_pipeline_interface + +#undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD +#define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD ia64_first_cycle_multipass_dfa_lookahead + +#undef TARGET_SCHED_INIT_DFA_PRE_CYCLE_INSN +#define TARGET_SCHED_INIT_DFA_PRE_CYCLE_INSN ia64_init_dfa_pre_cycle_insn +#undef TARGET_SCHED_DFA_PRE_CYCLE_INSN +#define TARGET_SCHED_DFA_PRE_CYCLE_INSN ia64_dfa_pre_cycle_insn + +#undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD +#define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD\ + ia64_first_cycle_multipass_dfa_lookahead_guard + +#undef TARGET_SCHED_DFA_NEW_CYCLE +#define TARGET_SCHED_DFA_NEW_CYCLE ia64_dfa_new_cycle + #ifdef HAVE_AS_TLS #undef TARGET_HAVE_TLS #define TARGET_HAVE_TLS true @@ -4209,6 +4269,23 @@ ia64_init_machine_status () void ia64_override_options () { + static struct pta + { + const char *const name; /* processor name or nickname. */ + const enum processor_type processor; + } + const processor_alias_table[] = + { + {"itanium", PROCESSOR_ITANIUM}, + {"itanium1", PROCESSOR_ITANIUM}, + {"merced", PROCESSOR_ITANIUM}, + {"itanium2", PROCESSOR_ITANIUM2}, + {"mckinley", PROCESSOR_ITANIUM2}, + }; + + int const pta_size = ARRAY_SIZE (processor_alias_table); + int i; + if (TARGET_AUTO_PIC) target_flags |= MASK_CONST_GP; @@ -4237,6 +4314,19 @@ ia64_override_options () ia64_tls_size = tmp; } + if (!ia64_tune_string) + ia64_tune_string = "itanium2"; + + for (i = 0; i < pta_size; i++) + if (! strcmp (ia64_tune_string, processor_alias_table[i].name)) + { + ia64_tune = processor_alias_table[i].processor; + break; + } + + if (i == pta_size) + error ("bad value (%s) for -tune= switch", ia64_tune_string); + ia64_flag_schedule_insns2 = flag_schedule_insns_after_reload; flag_schedule_insns_after_reload = 0; @@ -4249,20 +4339,9 @@ ia64_override_options () real_format_for_mode[TFmode - QFmode] = &ieee_extended_intel_128_format; } -static enum attr_itanium_requires_unit0 ia64_safe_itanium_requires_unit0 PARAMS((rtx)); static enum attr_itanium_class ia64_safe_itanium_class PARAMS((rtx)); static enum attr_type ia64_safe_type PARAMS((rtx)); -static enum attr_itanium_requires_unit0 -ia64_safe_itanium_requires_unit0 (insn) - rtx insn; -{ - if (recog_memoized (insn) >= 0) - return get_attr_itanium_requires_unit0 (insn); - else - return ITANIUM_REQUIRES_UNIT0_NO; -} - static enum attr_itanium_class ia64_safe_itanium_class (insn) rtx insn; @@ -5096,7 +5175,10 @@ group_barrier_needed_p (insn) abort (); } - if (first_instruction) + if (first_instruction && INSN_P (insn) + && ia64_safe_itanium_class (insn) != ITANIUM_CLASS_IGNORE + && GET_CODE (PATTERN (insn)) != USE + && GET_CODE (PATTERN (insn)) != CLOBBER) { need_barrier = 0; first_instruction = 0; @@ -5229,6 +5311,7 @@ emit_all_insn_group_barriers (dump, insns) } } } + static int errata_find_address_regs PARAMS ((rtx *, void *)); static void errata_emit_nops PARAMS ((rtx)); @@ -5374,92 +5457,92 @@ fixup_errata () } } -/* Instruction scheduling support. */ -/* Describe one bundle. */ -struct bundle -{ - /* Zero if there's no possibility of a stop in this bundle other than - at the end, otherwise the position of the optional stop bit. */ - int possible_stop; - /* The types of the three slots. */ - enum attr_type t[3]; - /* The pseudo op to be emitted into the assembler output. */ - const char *name; -}; +/* Instruction scheduling support. */ #define NR_BUNDLES 10 -/* A list of all available bundles. */ +/* A list of names of all available bundles. */ -static const struct bundle bundle[NR_BUNDLES] = +static const char *bundle_name [NR_BUNDLES] = { - { 2, { TYPE_M, TYPE_I, TYPE_I }, ".mii" }, - { 1, { TYPE_M, TYPE_M, TYPE_I }, ".mmi" }, - { 0, { TYPE_M, TYPE_F, TYPE_I }, ".mfi" }, - { 0, { TYPE_M, TYPE_M, TYPE_F }, ".mmf" }, + ".mii", + ".mmi", + ".mfi", + ".mmf", #if NR_BUNDLES == 10 - { 0, { TYPE_B, TYPE_B, TYPE_B }, ".bbb" }, - { 0, { TYPE_M, TYPE_B, TYPE_B }, ".mbb" }, + ".bbb", + ".mbb", #endif - { 0, { TYPE_M, TYPE_I, TYPE_B }, ".mib" }, - { 0, { TYPE_M, TYPE_M, TYPE_B }, ".mmb" }, - { 0, { TYPE_M, TYPE_F, TYPE_B }, ".mfb" }, - /* .mfi needs to occur earlier than .mlx, so that we only generate it if - it matches an L type insn. Otherwise we'll try to generate L type - nops. */ - { 0, { TYPE_M, TYPE_L, TYPE_X }, ".mlx" } + ".mib", + ".mmb", + ".mfb", + ".mlx" }; -/* Describe a packet of instructions. Packets consist of two bundles that - are visible to the hardware in one scheduling window. */ +/* Nonzero if we should insert stop bits into the schedule. */ -struct ia64_packet -{ - const struct bundle *t1, *t2; - /* Precomputed value of the first split issue in this packet if a cycle - starts at its beginning. */ - int first_split; - /* For convenience, the insn types are replicated here so we don't have - to go through T1 and T2 all the time. */ - enum attr_type t[6]; -}; +int ia64_final_schedule = 0; -/* An array containing all possible packets. */ -#define NR_PACKETS (NR_BUNDLES * NR_BUNDLES) -static struct ia64_packet packets[NR_PACKETS]; +/* Codes of the corrsponding quieryied units: */ -/* Map attr_type to a string with the name. */ +static int _0mii_, _0mmi_, _0mfi_, _0mmf_; +static int _0bbb_, _0mbb_, _0mib_, _0mmb_, _0mfb_, _0mlx_; -static const char *const type_names[] = -{ - "UNKNOWN", "A", "I", "M", "F", "B", "L", "X", "S" -}; +static int _1mii_, _1mmi_, _1mfi_, _1mmf_; +static int _1bbb_, _1mbb_, _1mib_, _1mmb_, _1mfb_, _1mlx_; -/* Nonzero if we should insert stop bits into the schedule. */ -int ia64_final_schedule = 0; +static int pos_1, pos_2, pos_3, pos_4, pos_5, pos_6; + +/* The following variable value is an insn group barrier. */ + +static rtx dfa_stop_insn; + +/* The following variable value is the last issued insn. */ + +static rtx last_scheduled_insn; + +/* The following variable value is size of the DFA state. */ + +static size_t dfa_state_size; + +/* The following variable value is pointer to a DFA state used as + temporary variable. */ + +static state_t temp_dfa_state = NULL; + +/* The following variable value is DFA state after issuing the last + insn. */ + +static state_t prev_cycle_state = NULL; + +/* The following array element values are TRUE if the corresponding + insn reuqires to add stop bits before it. */ + +static char *stops_p; + +/* The following variable is used to set up the mentioned above array. */ + +static int stop_before_p = 0; + +/* The following variable value is length of the arrays `clocks' and + `add_cycles'. */ + +static int clocks_length; + +/* The following array element values are cycles on which the + corresponding insn will be issued. The array is used only for + Itanium1. */ + +static int *clocks; + +/* The following array element values are numbers of cycles should be + added to improve insn scheduling for MM_insns for Itanium1. */ + +static int *add_cycles; -static int itanium_split_issue PARAMS ((const struct ia64_packet *, int)); static rtx ia64_single_set PARAMS ((rtx)); -static int insn_matches_slot PARAMS ((const struct ia64_packet *, enum attr_type, int, rtx)); static void ia64_emit_insn_before PARAMS ((rtx, rtx)); -static void maybe_rotate PARAMS ((FILE *)); -static void finish_last_head PARAMS ((FILE *, int)); -static void rotate_one_bundle PARAMS ((FILE *)); -static void rotate_two_bundles PARAMS ((FILE *)); -static void nop_cycles_until PARAMS ((int, FILE *)); -static void cycle_end_fill_slots PARAMS ((FILE *)); -static int packet_matches_p PARAMS ((const struct ia64_packet *, int, int *)); -static int get_split PARAMS ((const struct ia64_packet *, int)); -static int find_best_insn PARAMS ((rtx *, enum attr_type *, int, - const struct ia64_packet *, int)); -static void find_best_packet PARAMS ((int *, const struct ia64_packet **, - rtx *, enum attr_type *, int)); -static int itanium_reorder PARAMS ((FILE *, rtx *, rtx *, int)); -static void dump_current_packet PARAMS ((FILE *)); -static void schedule_stop PARAMS ((FILE *)); -static rtx gen_nop_type PARAMS ((enum attr_type)); -static void ia64_emit_nops PARAMS ((void)); /* Map a bundle number to its pseudo-op. */ @@ -5467,55 +5550,9 @@ const char * get_bundle_name (b) int b; { - return bundle[b].name; + return bundle_name[b]; } -/* Compute the slot which will cause a split issue in packet P if the - current cycle begins at slot BEGIN. */ - -static int -itanium_split_issue (p, begin) - const struct ia64_packet *p; - int begin; -{ - int type_count[TYPE_S]; - int i; - int split = 6; - - if (begin < 3) - { - /* Always split before and after MMF. */ - if (p->t[0] == TYPE_M && p->t[1] == TYPE_M && p->t[2] == TYPE_F) - return 3; - if (p->t[3] == TYPE_M && p->t[4] == TYPE_M && p->t[5] == TYPE_F) - return 3; - /* Always split after MBB and BBB. */ - if (p->t[1] == TYPE_B) - return 3; - /* Split after first bundle in MIB BBB combination. */ - if (p->t[2] == TYPE_B && p->t[3] == TYPE_B) - return 3; - } - - memset (type_count, 0, sizeof type_count); - for (i = begin; i < split; i++) - { - enum attr_type t0 = p->t[i]; - /* An MLX bundle reserves the same units as an MFI bundle. */ - enum attr_type t = (t0 == TYPE_L ? TYPE_F - : t0 == TYPE_X ? TYPE_I - : t0); - - /* Itanium can execute up to 3 branches, 2 floating point, 2 memory, and - 2 integer per cycle. */ - int max = (t == TYPE_B ? 3 : 2); - if (type_count[t] == max) - return i; - - type_count[t]++; - } - return split; -} /* Return the maximum number of instructions a cpu can issue. */ @@ -5563,208 +5600,21 @@ ia64_adjust_cost (insn, link, dep_insn, cost) rtx insn, link, dep_insn; int cost; { - enum attr_type dep_type; enum attr_itanium_class dep_class; enum attr_itanium_class insn_class; - rtx dep_set, set, src, addr; - - if (GET_CODE (PATTERN (insn)) == CLOBBER - || GET_CODE (PATTERN (insn)) == USE - || GET_CODE (PATTERN (dep_insn)) == CLOBBER - || GET_CODE (PATTERN (dep_insn)) == USE - /* @@@ Not accurate for indirect calls. */ - || GET_CODE (insn) == CALL_INSN - || ia64_safe_type (insn) == TYPE_S) - return 0; - if (REG_NOTE_KIND (link) == REG_DEP_OUTPUT - || REG_NOTE_KIND (link) == REG_DEP_ANTI) - return 0; + if (REG_NOTE_KIND (link) != REG_DEP_OUTPUT) + return cost; - dep_type = ia64_safe_type (dep_insn); - dep_class = ia64_safe_itanium_class (dep_insn); insn_class = ia64_safe_itanium_class (insn); - - /* Compares that feed a conditional branch can execute in the same - cycle. */ - dep_set = ia64_single_set (dep_insn); - set = ia64_single_set (insn); - - if (dep_type != TYPE_F - && dep_set - && GET_CODE (SET_DEST (dep_set)) == REG - && PR_REG (REGNO (SET_DEST (dep_set))) - && GET_CODE (insn) == JUMP_INSN) + dep_class = ia64_safe_itanium_class (dep_insn); + if (dep_class == ITANIUM_CLASS_ST || dep_class == ITANIUM_CLASS_STF + || insn_class == ITANIUM_CLASS_ST || insn_class == ITANIUM_CLASS_STF) return 0; - if (dep_set && GET_CODE (SET_DEST (dep_set)) == MEM) - { - /* ??? Can't find any information in the documenation about whether - a sequence - st [rx] = ra - ld rb = [ry] - splits issue. Assume it doesn't. */ - return 0; - } - - src = set ? SET_SRC (set) : 0; - addr = 0; - if (set) - { - if (GET_CODE (SET_DEST (set)) == MEM) - addr = XEXP (SET_DEST (set), 0); - else if (GET_CODE (SET_DEST (set)) == SUBREG - && GET_CODE (SUBREG_REG (SET_DEST (set))) == MEM) - addr = XEXP (SUBREG_REG (SET_DEST (set)), 0); - else - { - addr = src; - if (GET_CODE (addr) == UNSPEC && XVECLEN (addr, 0) > 0) - addr = XVECEXP (addr, 0, 0); - while (GET_CODE (addr) == SUBREG || GET_CODE (addr) == ZERO_EXTEND) - addr = XEXP (addr, 0); - if (GET_CODE (addr) == MEM) - addr = XEXP (addr, 0); - else - addr = 0; - } - } - - if (addr && GET_CODE (addr) == POST_MODIFY) - addr = XEXP (addr, 0); - - set = ia64_single_set (dep_insn); - - if ((dep_class == ITANIUM_CLASS_IALU - || dep_class == ITANIUM_CLASS_ILOG - || dep_class == ITANIUM_CLASS_LD) - && (insn_class == ITANIUM_CLASS_LD - || insn_class == ITANIUM_CLASS_ST)) - { - if (! addr || ! set) - abort (); - /* This isn't completely correct - an IALU that feeds an address has - a latency of 1 cycle if it's issued in an M slot, but 2 cycles - otherwise. Unfortunately there's no good way to describe this. */ - if (reg_overlap_mentioned_p (SET_DEST (set), addr)) - return cost + 1; - } - - if ((dep_class == ITANIUM_CLASS_IALU - || dep_class == ITANIUM_CLASS_ILOG - || dep_class == ITANIUM_CLASS_LD) - && (insn_class == ITANIUM_CLASS_MMMUL - || insn_class == ITANIUM_CLASS_MMSHF - || insn_class == ITANIUM_CLASS_MMSHFI)) - return 3; - - if (dep_class == ITANIUM_CLASS_FMAC - && (insn_class == ITANIUM_CLASS_FMISC - || insn_class == ITANIUM_CLASS_FCVTFX - || insn_class == ITANIUM_CLASS_XMPY)) - return 7; - - if ((dep_class == ITANIUM_CLASS_FMAC - || dep_class == ITANIUM_CLASS_FMISC - || dep_class == ITANIUM_CLASS_FCVTFX - || dep_class == ITANIUM_CLASS_XMPY) - && insn_class == ITANIUM_CLASS_STF) - return 8; - - /* Intel docs say only LD, ST, IALU, ILOG, ISHF consumers have latency 4, - but HP engineers say any non-MM operation. */ - if ((dep_class == ITANIUM_CLASS_MMMUL - || dep_class == ITANIUM_CLASS_MMSHF - || dep_class == ITANIUM_CLASS_MMSHFI) - && insn_class != ITANIUM_CLASS_MMMUL - && insn_class != ITANIUM_CLASS_MMSHF - && insn_class != ITANIUM_CLASS_MMSHFI) - return 4; - return cost; } -/* Describe the current state of the Itanium pipeline. */ -static struct -{ - /* The first slot that is used in the current cycle. */ - int first_slot; - /* The next slot to fill. */ - int cur; - /* The packet we have selected for the current issue window. */ - const struct ia64_packet *packet; - /* The position of the split issue that occurs due to issue width - limitations (6 if there's no split issue). */ - int split; - /* Record data about the insns scheduled so far in the same issue - window. The elements up to but not including FIRST_SLOT belong - to the previous cycle, the ones starting with FIRST_SLOT belong - to the current cycle. */ - enum attr_type types[6]; - rtx insns[6]; - int stopbit[6]; - /* Nonzero if we decided to schedule a stop bit. */ - int last_was_stop; -} sched_data; - -/* Temporary arrays; they have enough elements to hold all insns that - can be ready at the same time while scheduling of the current block. - SCHED_READY can hold ready insns, SCHED_TYPES their types. */ -static rtx *sched_ready; -static enum attr_type *sched_types; - -/* Determine whether an insn INSN of type ITYPE can fit into slot SLOT - of packet P. */ - -static int -insn_matches_slot (p, itype, slot, insn) - const struct ia64_packet *p; - enum attr_type itype; - int slot; - rtx insn; -{ - enum attr_itanium_requires_unit0 u0; - enum attr_type stype = p->t[slot]; - - if (insn) - { - u0 = ia64_safe_itanium_requires_unit0 (insn); - if (u0 == ITANIUM_REQUIRES_UNIT0_YES) - { - int i; - for (i = sched_data.first_slot; i < slot; i++) - if (p->t[i] == stype - || (stype == TYPE_F && p->t[i] == TYPE_L) - || (stype == TYPE_I && p->t[i] == TYPE_X)) - return 0; - } - if (GET_CODE (insn) == CALL_INSN) - { - /* Reject calls in multiway branch packets. We want to limit - the number of multiway branches we generate (since the branch - predictor is limited), and this seems to work fairly well. - (If we didn't do this, we'd have to add another test here to - force calls into the third slot of the bundle.) */ - if (slot < 3) - { - if (p->t[1] == TYPE_B) - return 0; - } - else - { - if (p->t[4] == TYPE_B) - return 0; - } - } - } - - if (itype == stype) - return 1; - if (itype == TYPE_A) - return stype == TYPE_M || stype == TYPE_I; - return 0; -} - /* Like emit_insn_before, but skip cycle_display notes. ??? When cycle display notes are implemented, update this. */ @@ -5775,1055 +5625,1324 @@ ia64_emit_insn_before (insn, before) emit_insn_before (insn, before); } -/* When rotating a bundle out of the issue window, insert a bundle selector - insn in front of it. DUMP is the scheduling dump file or NULL. START - is either 0 or 3, depending on whether we want to emit a bundle selector - for the first bundle or the second bundle in the current issue window. - - The selector insns are emitted this late because the selected packet can - be changed until parts of it get rotated out. */ +/* The following function marks insns who produce addresses for load + and store insns. Such insns will be placed into M slots because it + decrease latency time for Itanium1 (see function + `ia64_produce_address_p' and the DFA descriptions). */ static void -finish_last_head (dump, start) - FILE *dump; - int start; +ia64_dependencies_evaluation_hook (head, tail) + rtx head, tail; { - const struct ia64_packet *p = sched_data.packet; - const struct bundle *b = start == 0 ? p->t1 : p->t2; - int bundle_type = b - bundle; - rtx insn; - int i; - - if (! ia64_final_schedule) - return; - - for (i = start; sched_data.insns[i] == 0; i++) - if (i == start + 3) - abort (); - insn = sched_data.insns[i]; + rtx insn, link, next, next_tail; + + next_tail = NEXT_INSN (tail); + for (insn = head; insn != next_tail; insn = NEXT_INSN (insn)) + if (INSN_P (insn)) + insn->call = 0; + for (insn = head; insn != next_tail; insn = NEXT_INSN (insn)) + if (INSN_P (insn) + && ia64_safe_itanium_class (insn) == ITANIUM_CLASS_IALU) + { + for (link = INSN_DEPEND (insn); link != 0; link = XEXP (link, 1)) + { + next = XEXP (link, 0); + if ((ia64_safe_itanium_class (next) == ITANIUM_CLASS_ST + || ia64_safe_itanium_class (next) == ITANIUM_CLASS_STF) + && ia64_st_address_bypass_p (insn, next)) + break; + else if ((ia64_safe_itanium_class (next) == ITANIUM_CLASS_LD + || ia64_safe_itanium_class (next) + == ITANIUM_CLASS_FLD) + && ia64_ld_address_bypass_p (insn, next)) + break; + } + insn->call = link != 0; + } +} - if (dump) - fprintf (dump, "// Emitting template before %d: %s\n", - INSN_UID (insn), b->name); +/* We're beginning a new block. Initialize data structures as necessary. */ - ia64_emit_insn_before (gen_bundle_selector (GEN_INT (bundle_type)), insn); +static void +ia64_sched_init (dump, sched_verbose, max_ready) + FILE *dump ATTRIBUTE_UNUSED; + int sched_verbose ATTRIBUTE_UNUSED; + int max_ready ATTRIBUTE_UNUSED; +{ +#ifdef ENABLE_CHECKING + rtx insn; + + if (reload_completed) + for (insn = NEXT_INSN (current_sched_info->prev_head); + insn != current_sched_info->next_tail; + insn = NEXT_INSN (insn)) + if (SCHED_GROUP_P (insn)) + abort (); +#endif + last_scheduled_insn = NULL_RTX; + init_insn_group_barriers (); } -/* We can't schedule more insns this cycle. Fix up the scheduling state - and advance FIRST_SLOT and CUR. - We have to distribute the insns that are currently found between - FIRST_SLOT and CUR into the slots of the packet we have selected. So - far, they are stored successively in the fields starting at FIRST_SLOT; - now they must be moved to the correct slots. - DUMP is the current scheduling dump file, or NULL. */ +/* We are about to being issuing insns for this clock cycle. + Override the default sort algorithm to better slot instructions. */ -static void -cycle_end_fill_slots (dump) +static int +ia64_dfa_sched_reorder (dump, sched_verbose, ready, pn_ready, + clock_var, reorder_type) FILE *dump; + int sched_verbose; + rtx *ready; + int *pn_ready; + int clock_var ATTRIBUTE_UNUSED; + int reorder_type; { - const struct ia64_packet *packet = sched_data.packet; - int slot, i; - enum attr_type tmp_types[6]; - rtx tmp_insns[6]; + int n_asms; + int n_ready = *pn_ready; + rtx *e_ready = ready + n_ready; + rtx *insnp; - memcpy (tmp_types, sched_data.types, 6 * sizeof (enum attr_type)); - memcpy (tmp_insns, sched_data.insns, 6 * sizeof (rtx)); + if (sched_verbose) + fprintf (dump, "// ia64_dfa_sched_reorder (type %d):\n", reorder_type); - for (i = slot = sched_data.first_slot; i < sched_data.cur; i++) + if (reorder_type == 0) { - enum attr_type t = tmp_types[i]; - if (t != ia64_safe_type (tmp_insns[i])) - abort (); - while (! insn_matches_slot (packet, t, slot, tmp_insns[i])) - { - if (slot > sched_data.split) - abort (); - if (dump) - fprintf (dump, "// Packet needs %s, have %s\n", - type_names[packet->t[slot]], type_names[t]); - sched_data.types[slot] = packet->t[slot]; - sched_data.insns[slot] = 0; - sched_data.stopbit[slot] = 0; - - /* ??? TYPE_L instructions always fill up two slots, but we don't - support TYPE_L nops. */ - if (packet->t[slot] == TYPE_L) - abort (); - - slot++; - } - - /* Do _not_ use T here. If T == TYPE_A, then we'd risk changing the - actual slot type later. */ - sched_data.types[slot] = packet->t[slot]; - sched_data.insns[slot] = tmp_insns[i]; - sched_data.stopbit[slot] = 0; - slot++; + /* First, move all USEs, CLOBBERs and other crud out of the way. */ + n_asms = 0; + for (insnp = ready; insnp < e_ready; insnp++) + if (insnp < e_ready) + { + rtx insn = *insnp; + enum attr_type t = ia64_safe_type (insn); + if (t == TYPE_UNKNOWN) + { + if (GET_CODE (PATTERN (insn)) == ASM_INPUT + || asm_noperands (PATTERN (insn)) >= 0) + { + rtx lowest = ready[n_asms]; + ready[n_asms] = insn; + *insnp = lowest; + n_asms++; + } + else + { + rtx highest = ready[n_ready - 1]; + ready[n_ready - 1] = insn; + *insnp = highest; + return 1; + } + } + } - /* TYPE_L instructions always fill up two slots. */ - if (t == TYPE_L) + if (n_asms < n_ready) { - sched_data.types[slot] = packet->t[slot]; - sched_data.insns[slot] = 0; - sched_data.stopbit[slot] = 0; - slot++; + /* Some normal insns to process. Skip the asms. */ + ready += n_asms; + n_ready -= n_asms; } + else if (n_ready > 0) + return 1; } - /* This isn't right - there's no need to pad out until the forced split; - the CPU will automatically split if an insn isn't ready. */ -#if 0 - while (slot < sched_data.split) + if (ia64_final_schedule) { - sched_data.types[slot] = packet->t[slot]; - sched_data.insns[slot] = 0; - sched_data.stopbit[slot] = 0; - slot++; + int deleted = 0; + int nr_need_stop = 0; + + for (insnp = ready; insnp < e_ready; insnp++) + if (safe_group_barrier_needed_p (*insnp)) + nr_need_stop++; + + if (reorder_type == 1 && n_ready == nr_need_stop) + return 0; + if (reorder_type == 0) + return 1; + insnp = e_ready; + /* Move down everything that needs a stop bit, preserving + relative order. */ + while (insnp-- > ready + deleted) + while (insnp >= ready + deleted) + { + rtx insn = *insnp; + if (! safe_group_barrier_needed_p (insn)) + break; + memmove (ready + 1, ready, (insnp - ready) * sizeof (rtx)); + *ready = insn; + deleted++; + } + n_ready -= deleted; + ready += deleted; } -#endif - sched_data.first_slot = sched_data.cur = slot; + return 1; } -/* Bundle rotations, as described in the Itanium optimization manual. - We can rotate either one or both bundles out of the issue window. - DUMP is the current scheduling dump file, or NULL. */ - -static void -rotate_one_bundle (dump) - FILE *dump; -{ - if (dump) - fprintf (dump, "// Rotating one bundle.\n"); - - finish_last_head (dump, 0); - if (sched_data.cur > 3) - { - sched_data.cur -= 3; - sched_data.first_slot -= 3; - memmove (sched_data.types, - sched_data.types + 3, - sched_data.cur * sizeof *sched_data.types); - memmove (sched_data.stopbit, - sched_data.stopbit + 3, - sched_data.cur * sizeof *sched_data.stopbit); - memmove (sched_data.insns, - sched_data.insns + 3, - sched_data.cur * sizeof *sched_data.insns); - sched_data.packet - = &packets[(sched_data.packet->t2 - bundle) * NR_BUNDLES]; - } - else - { - sched_data.cur = 0; - sched_data.first_slot = 0; - } -} +/* We are about to being issuing insns for this clock cycle. Override + the default sort algorithm to better slot instructions. */ -static void -rotate_two_bundles (dump) +static int +ia64_sched_reorder (dump, sched_verbose, ready, pn_ready, clock_var) FILE *dump; + int sched_verbose; + rtx *ready; + int *pn_ready; + int clock_var; { - if (dump) - fprintf (dump, "// Rotating two bundles.\n"); - - if (sched_data.cur == 0) - return; - - finish_last_head (dump, 0); - if (sched_data.cur > 3) - finish_last_head (dump, 3); - sched_data.cur = 0; - sched_data.first_slot = 0; + return ia64_dfa_sched_reorder (dump, sched_verbose, ready, + pn_ready, clock_var, 0); } -/* We're beginning a new block. Initialize data structures as necessary. */ +/* Like ia64_sched_reorder, but called after issuing each insn. + Override the default sort algorithm to better slot instructions. */ -static void -ia64_sched_init (dump, sched_verbose, max_ready) +static int +ia64_sched_reorder2 (dump, sched_verbose, ready, pn_ready, clock_var) FILE *dump ATTRIBUTE_UNUSED; int sched_verbose ATTRIBUTE_UNUSED; - int max_ready; + rtx *ready; + int *pn_ready; + int clock_var; { - static int initialized = 0; - - if (! initialized) - { - int b1, b2, i; - - initialized = 1; - - for (i = b1 = 0; b1 < NR_BUNDLES; b1++) - { - const struct bundle *t1 = bundle + b1; - for (b2 = 0; b2 < NR_BUNDLES; b2++, i++) - { - const struct bundle *t2 = bundle + b2; - - packets[i].t1 = t1; - packets[i].t2 = t2; - } - } - for (i = 0; i < NR_PACKETS; i++) - { - int j; - for (j = 0; j < 3; j++) - packets[i].t[j] = packets[i].t1->t[j]; - for (j = 0; j < 3; j++) - packets[i].t[j + 3] = packets[i].t2->t[j]; - packets[i].first_split = itanium_split_issue (packets + i, 0); - } - - } - - init_insn_group_barriers (); - - memset (&sched_data, 0, sizeof sched_data); - sched_types = (enum attr_type *) xmalloc (max_ready - * sizeof (enum attr_type)); - sched_ready = (rtx *) xmalloc (max_ready * sizeof (rtx)); + if (ia64_tune == PROCESSOR_ITANIUM && reload_completed && last_scheduled_insn) + clocks [INSN_UID (last_scheduled_insn)] = clock_var; + return ia64_dfa_sched_reorder (dump, sched_verbose, ready, pn_ready, + clock_var, 1); } -/* See if the packet P can match the insns we have already scheduled. Return - nonzero if so. In *PSLOT, we store the first slot that is available for - more instructions if we choose this packet. - SPLIT holds the last slot we can use, there's a split issue after it so - scheduling beyond it would cause us to use more than one cycle. */ +/* We are about to issue INSN. Return the number of insns left on the + ready queue that can be issued this cycle. */ static int -packet_matches_p (p, split, pslot) - const struct ia64_packet *p; - int split; - int *pslot; -{ - int filled = sched_data.cur; - int first = sched_data.first_slot; - int i, slot; - - /* First, check if the first of the two bundles must be a specific one (due - to stop bits). */ - if (first > 0 && sched_data.stopbit[0] && p->t1->possible_stop != 1) - return 0; - if (first > 1 && sched_data.stopbit[1] && p->t1->possible_stop != 2) - return 0; - - for (i = 0; i < first; i++) - if (! insn_matches_slot (p, sched_data.types[i], i, - sched_data.insns[i])) - return 0; - for (i = slot = first; i < filled; i++) +ia64_variable_issue (dump, sched_verbose, insn, can_issue_more) + FILE *dump ATTRIBUTE_UNUSED; + int sched_verbose ATTRIBUTE_UNUSED; + rtx insn ATTRIBUTE_UNUSED; + int can_issue_more ATTRIBUTE_UNUSED; +{ + last_scheduled_insn = insn; + memcpy (prev_cycle_state, curr_state, dfa_state_size); + if (reload_completed) { - while (slot < split) - { - if (insn_matches_slot (p, sched_data.types[i], slot, - sched_data.insns[i])) - break; - slot++; - } - if (slot == split) - return 0; - slot++; + if (group_barrier_needed_p (insn)) + abort (); + if (GET_CODE (insn) == CALL_INSN) + init_insn_group_barriers (); + stops_p [INSN_UID (insn)] = stop_before_p; + stop_before_p = 0; } - - if (pslot) - *pslot = slot; return 1; } -/* A frontend for itanium_split_issue. For a packet P and a slot - number FIRST that describes the start of the current clock cycle, - return the slot number of the first split issue. This function - uses the cached number found in P if possible. */ +/* We are choosing insn from the ready queue. Return nonzero if INSN + can be chosen. */ static int -get_split (p, first) - const struct ia64_packet *p; - int first; +ia64_first_cycle_multipass_dfa_lookahead_guard (insn) + rtx insn; { - if (first == 0) - return p->first_split; - return itanium_split_issue (p, first); + if (insn == NULL_RTX || !INSN_P (insn)) + abort (); + return (!reload_completed + || !safe_group_barrier_needed_p (insn)); } -/* Given N_READY insns in the array READY, whose types are found in the - corresponding array TYPES, return the insn that is best suited to be - scheduled in slot SLOT of packet P. */ +/* The following variable value is pseudo-insn used by the DFA insn + scheduler to change the DFA state when the simulated clock is + increased. */ + +static rtx dfa_pre_cycle_insn; + +/* We are about to being issuing INSN. Return nonzero if we can not + issue it on given cycle CLOCK and return zero if we should not sort + the ready queue on the next clock start. */ static int -find_best_insn (ready, types, n_ready, p, slot) - rtx *ready; - enum attr_type *types; - int n_ready; - const struct ia64_packet *p; - int slot; +ia64_dfa_new_cycle (dump, verbose, insn, last_clock, clock, sort_p) + FILE *dump; + int verbose; + rtx insn; + int last_clock, clock; + int *sort_p; { - int best = -1; - int best_pri = 0; - while (n_ready-- > 0) + int setup_clocks_p = FALSE; + + if (insn == NULL_RTX || !INSN_P (insn)) + abort (); + if ((reload_completed && safe_group_barrier_needed_p (insn)) + || (last_scheduled_insn + && (GET_CODE (last_scheduled_insn) == CALL_INSN + || GET_CODE (PATTERN (last_scheduled_insn)) == ASM_INPUT + || asm_noperands (PATTERN (last_scheduled_insn)) >= 0))) { - rtx insn = ready[n_ready]; - if (! insn) - continue; - if (best >= 0 && INSN_PRIORITY (ready[n_ready]) < best_pri) - break; - /* If we have equally good insns, one of which has a stricter - slot requirement, prefer the one with the stricter requirement. */ - if (best >= 0 && types[n_ready] == TYPE_A) - continue; - if (insn_matches_slot (p, types[n_ready], slot, insn)) + init_insn_group_barriers (); + if (verbose && dump) + fprintf (dump, "// Stop should be before %d%s\n", INSN_UID (insn), + last_clock == clock ? " + cycle advance" : ""); + stop_before_p = 1; + if (last_clock == clock) { - best = n_ready; - best_pri = INSN_PRIORITY (ready[best]); - - /* If there's no way we could get a stricter requirement, stop - looking now. */ - if (types[n_ready] != TYPE_A - && ia64_safe_itanium_requires_unit0 (ready[n_ready])) - break; - break; + state_transition (curr_state, dfa_stop_insn); + if (TARGET_EARLY_STOP_BITS) + *sort_p = (last_scheduled_insn == NULL_RTX + || GET_CODE (last_scheduled_insn) != CALL_INSN); + else + *sort_p = 0; + return 1; + } + else if (reload_completed) + setup_clocks_p = TRUE; + memcpy (curr_state, prev_cycle_state, dfa_state_size); + state_transition (curr_state, dfa_stop_insn); + state_transition (curr_state, dfa_pre_cycle_insn); + state_transition (curr_state, NULL); + } + else if (reload_completed) + setup_clocks_p = TRUE; + if (setup_clocks_p && ia64_tune == PROCESSOR_ITANIUM) + { + enum attr_itanium_class c = ia64_safe_itanium_class (insn); + + if (c != ITANIUM_CLASS_MMMUL && c != ITANIUM_CLASS_MMSHF) + { + rtx link; + int d = -1; + + for (link = LOG_LINKS (insn); link; link = XEXP (link, 1)) + if (REG_NOTE_KIND (link) == 0) + { + enum attr_itanium_class dep_class; + rtx dep_insn = XEXP (link, 0); + + dep_class = ia64_safe_itanium_class (dep_insn); + if ((dep_class == ITANIUM_CLASS_MMMUL + || dep_class == ITANIUM_CLASS_MMSHF) + && last_clock - clocks [INSN_UID (dep_insn)] < 4 + && (d < 0 + || last_clock - clocks [INSN_UID (dep_insn)] < d)) + d = last_clock - clocks [INSN_UID (dep_insn)]; + } + if (d >= 0) + add_cycles [INSN_UID (insn)] = 3 - d; } } - return best; + return 0; } -/* Select the best packet to use given the current scheduler state and the - current ready list. - READY is an array holding N_READY ready insns; TYPES is a corresponding - array that holds their types. Store the best packet in *PPACKET and the - number of insns that can be scheduled in the current cycle in *PBEST. */ + -static void -find_best_packet (pbest, ppacket, ready, types, n_ready) - int *pbest; - const struct ia64_packet **ppacket; - rtx *ready; - enum attr_type *types; - int n_ready; -{ - int first = sched_data.first_slot; - int best = 0; - int lowest_end = 6; - const struct ia64_packet *best_packet = NULL; - int i; +/* The following page contains abstract data `bundle states' which are + used for bundling insns (inserting nops and template generation). */ + +/* The following describes state of insn bundling. */ + +struct bundle_state +{ + /* Unique bundle state number to identify them in the debugging + output */ + int unique_num; + rtx insn; /* corresponding insn, NULL for the 1st and the last state */ + /* number nops before and after the insn */ + short before_nops_num, after_nops_num; + int insn_num; /* insn number (0 - for initial state, 1 - for the 1st + insn */ + int cost; /* cost of the state in cycles */ + int accumulated_insns_num; /* number of all previous insns including + nops. L is considered as 2 insns */ + int branch_deviation; /* deviation of previous branches from 3rd slots */ + struct bundle_state *next; /* next state with the same insn_num */ + struct bundle_state *originator; /* originator (previous insn state) */ + /* All bundle states are in the following chain. */ + struct bundle_state *allocated_states_chain; + /* The DFA State after issuing the insn and the nops. */ + state_t dfa_state; +}; - for (i = 0; i < NR_PACKETS; i++) - { - const struct ia64_packet *p = packets + i; - int slot; - int split = get_split (p, first); - int win = 0; - int first_slot, last_slot; - int b_nops = 0; +/* The following is map insn number to the corresponding bundle state. */ - if (! packet_matches_p (p, split, &first_slot)) - continue; +static struct bundle_state **index_to_bundle_states; - memcpy (sched_ready, ready, n_ready * sizeof (rtx)); +/* The unique number of next bundle state. */ - win = 0; - last_slot = 6; - for (slot = first_slot; slot < split; slot++) - { - int insn_nr; +static int bundle_states_num; - /* Disallow a degenerate case where the first bundle doesn't - contain anything but NOPs! */ - if (first_slot == 0 && win == 0 && slot == 3) - { - win = -1; - break; - } +/* All allocated bundle states are in the following chain. */ - insn_nr = find_best_insn (sched_ready, types, n_ready, p, slot); - if (insn_nr >= 0) - { - sched_ready[insn_nr] = 0; - last_slot = slot; - win++; - } - else if (p->t[slot] == TYPE_B) - b_nops++; - } - /* We must disallow MBB/BBB packets if any of their B slots would be - filled with nops. */ - if (last_slot < 3) - { - if (p->t[1] == TYPE_B && (b_nops || last_slot < 2)) - win = -1; - } - else - { - if (p->t[4] == TYPE_B && (b_nops || last_slot < 5)) - win = -1; - } +static struct bundle_state *allocated_bundle_states_chain; - if (win > best - || (win == best && last_slot < lowest_end)) - { - best = win; - lowest_end = last_slot; - best_packet = p; - } - } - *pbest = best; - *ppacket = best_packet; -} +/* All allocated but not used bundle states are in the following + chain. */ -/* Reorder the ready list so that the insns that can be issued in this cycle - are found in the correct order at the end of the list. - DUMP is the scheduling dump file, or NULL. READY points to the start, - E_READY to the end of the ready list. MAY_FAIL determines what should be - done if no insns can be scheduled in this cycle: if it is zero, we abort, - otherwise we return 0. - Return 1 if any insns can be scheduled in this cycle. */ +static struct bundle_state *free_bundle_state_chain; -static int -itanium_reorder (dump, ready, e_ready, may_fail) - FILE *dump; - rtx *ready; - rtx *e_ready; - int may_fail; -{ - const struct ia64_packet *best_packet; - int n_ready = e_ready - ready; - int first = sched_data.first_slot; - int i, best, best_split, filled; - for (i = 0; i < n_ready; i++) - sched_types[i] = ia64_safe_type (ready[i]); +/* The following function returns a free bundle state. */ - find_best_packet (&best, &best_packet, ready, sched_types, n_ready); +static struct bundle_state * +get_free_bundle_state () +{ + struct bundle_state *result; - if (best == 0) + if (free_bundle_state_chain != NULL) { - if (may_fail) - return 0; - abort (); + result = free_bundle_state_chain; + free_bundle_state_chain = result->next; } - - if (dump) + else { - fprintf (dump, "// Selected bundles: %s %s (%d insns)\n", - best_packet->t1->name, - best_packet->t2 ? best_packet->t2->name : NULL, best); + result = xmalloc (sizeof (struct bundle_state)); + result->dfa_state = xmalloc (dfa_state_size); + result->allocated_states_chain = allocated_bundle_states_chain; + allocated_bundle_states_chain = result; } + result->unique_num = bundle_states_num++; + return result; + +} - best_split = itanium_split_issue (best_packet, first); - packet_matches_p (best_packet, best_split, &filled); +/* The following function frees given bundle state. */ - for (i = filled; i < best_split; i++) - { - int insn_nr; +static void +free_bundle_state (state) + struct bundle_state *state; +{ + state->next = free_bundle_state_chain; + free_bundle_state_chain = state; +} - insn_nr = find_best_insn (ready, sched_types, n_ready, best_packet, i); - if (insn_nr >= 0) - { - rtx insn = ready[insn_nr]; - memmove (ready + insn_nr, ready + insn_nr + 1, - (n_ready - insn_nr - 1) * sizeof (rtx)); - memmove (sched_types + insn_nr, sched_types + insn_nr + 1, - (n_ready - insn_nr - 1) * sizeof (enum attr_type)); - ready[--n_ready] = insn; - } - } +/* Start work with abstract data `bundle states'. */ - sched_data.packet = best_packet; - sched_data.split = best_split; - return 1; +static void +initiate_bundle_states () +{ + bundle_states_num = 0; + free_bundle_state_chain = NULL; + allocated_bundle_states_chain = NULL; } -/* Dump information about the current scheduling state to file DUMP. */ +/* Finish work with abstract data `bundle states'. */ static void -dump_current_packet (dump) - FILE *dump; +finish_bundle_states () { - int i; - fprintf (dump, "// %d slots filled:", sched_data.cur); - for (i = 0; i < sched_data.first_slot; i++) - { - rtx insn = sched_data.insns[i]; - fprintf (dump, " %s", type_names[sched_data.types[i]]); - if (insn) - fprintf (dump, "/%s", type_names[ia64_safe_type (insn)]); - if (sched_data.stopbit[i]) - fprintf (dump, " ;;"); - } - fprintf (dump, " :::"); - for (i = sched_data.first_slot; i < sched_data.cur; i++) + struct bundle_state *curr_state, *next_state; + + for (curr_state = allocated_bundle_states_chain; + curr_state != NULL; + curr_state = next_state) { - rtx insn = sched_data.insns[i]; - enum attr_type t = ia64_safe_type (insn); - fprintf (dump, " (%d) %s", INSN_UID (insn), type_names[t]); + next_state = curr_state->allocated_states_chain; + free (curr_state->dfa_state); + free (curr_state); } - fprintf (dump, "\n"); } -/* Schedule a stop bit. DUMP is the current scheduling dump file, or - NULL. */ +/* Hash table of the bundle states. The key is dfa_state and insn_num + of the bundle states. */ -static void -schedule_stop (dump) - FILE *dump; -{ - const struct ia64_packet *best = sched_data.packet; - int i; - int best_stop = 6; +static htab_t bundle_state_table; - if (dump) - fprintf (dump, "// Stop bit, cur = %d.\n", sched_data.cur); +/* The function returns hash of BUNDLE_STATE. */ - if (sched_data.cur == 0) - { - if (dump) - fprintf (dump, "// At start of bundle, so nothing to do.\n"); - - rotate_two_bundles (NULL); - return; - } +static unsigned +bundle_state_hash (bundle_state) + const void *bundle_state; +{ + const struct bundle_state *state = (struct bundle_state *) bundle_state; + unsigned result, i; - for (i = -1; i < NR_PACKETS; i++) - { - /* This is a slight hack to give the current packet the first chance. - This is done to avoid e.g. switching from MIB to MBB bundles. */ - const struct ia64_packet *p = (i >= 0 ? packets + i : sched_data.packet); - int split = get_split (p, sched_data.first_slot); - const struct bundle *compare; - int next, stoppos; + for (result = i = 0; i < dfa_state_size; i++) + result += (((unsigned char *) state->dfa_state) [i] + << ((i % CHAR_BIT) * 3 + CHAR_BIT)); + return result + state->insn_num; +} - if (! packet_matches_p (p, split, &next)) - continue; +/* The function returns nonzero if the bundle state keys are equal. */ - compare = next > 3 ? p->t2 : p->t1; +static int +bundle_state_eq_p (bundle_state_1, bundle_state_2) + const void *bundle_state_1; + const void *bundle_state_2; +{ + const struct bundle_state * state1 = (struct bundle_state *) bundle_state_1; + const struct bundle_state * state2 = (struct bundle_state *) bundle_state_2; - stoppos = 3; - if (compare->possible_stop) - stoppos = compare->possible_stop; - if (next > 3) - stoppos += 3; + return (state1->insn_num == state2->insn_num + && memcmp (state1->dfa_state, state2->dfa_state, + dfa_state_size) == 0); +} - if (stoppos < next || stoppos >= best_stop) - { - if (compare->possible_stop == 0) - continue; - stoppos = (next > 3 ? 6 : 3); - } - if (stoppos < next || stoppos >= best_stop) - continue; +/* The function inserts the BUNDLE_STATE into the hash table. The + function returns nonzero if the bundle has been inserted into the + table. The table contains the best bundle state with given key. */ - if (dump) - fprintf (dump, "// switching from %s %s to %s %s (stop at %d)\n", - best->t1->name, best->t2->name, p->t1->name, p->t2->name, - stoppos); +static int +insert_bundle_state (bundle_state) + struct bundle_state *bundle_state; +{ + void **entry_ptr; - best_stop = stoppos; - best = p; + entry_ptr = htab_find_slot (bundle_state_table, bundle_state, 1); + if (*entry_ptr == NULL) + { + bundle_state->next = index_to_bundle_states [bundle_state->insn_num]; + index_to_bundle_states [bundle_state->insn_num] = bundle_state; + *entry_ptr = (void *) bundle_state; + return TRUE; } - - sched_data.packet = best; - cycle_end_fill_slots (dump); - while (sched_data.cur < best_stop) + else if (bundle_state->cost < ((struct bundle_state *) *entry_ptr)->cost + || (bundle_state->cost == ((struct bundle_state *) *entry_ptr)->cost + && (((struct bundle_state *)*entry_ptr)->accumulated_insns_num + > bundle_state->accumulated_insns_num + || (((struct bundle_state *) + *entry_ptr)->accumulated_insns_num + == bundle_state->accumulated_insns_num + && ((struct bundle_state *) + *entry_ptr)->branch_deviation + > bundle_state->branch_deviation)))) + { - sched_data.types[sched_data.cur] = best->t[sched_data.cur]; - sched_data.insns[sched_data.cur] = 0; - sched_data.stopbit[sched_data.cur] = 0; - sched_data.cur++; + struct bundle_state temp; + + temp = *(struct bundle_state *) *entry_ptr; + *(struct bundle_state *) *entry_ptr = *bundle_state; + ((struct bundle_state *) *entry_ptr)->next = temp.next; + *bundle_state = temp; } - sched_data.stopbit[sched_data.cur - 1] = 1; - sched_data.first_slot = best_stop; + return FALSE; +} + +/* Start work with the hash table. */ - if (dump) - dump_current_packet (dump); +static void +initiate_bundle_state_table () +{ + bundle_state_table = htab_create (50, bundle_state_hash, bundle_state_eq_p, + (htab_del) 0); } -/* If necessary, perform one or two rotations on the scheduling state. - This should only be called if we are starting a new cycle. */ +/* Finish work with the hash table. */ static void -maybe_rotate (dump) - FILE *dump; +finish_bundle_state_table () { - cycle_end_fill_slots (dump); - if (sched_data.cur == 6) - rotate_two_bundles (dump); - else if (sched_data.cur >= 3) - rotate_one_bundle (dump); - sched_data.first_slot = sched_data.cur; + htab_delete (bundle_state_table); } -/* The clock cycle when ia64_sched_reorder was last called. */ -static int prev_cycle; + -/* The first insn scheduled in the previous cycle. This is the saved - value of sched_data.first_slot. */ -static int prev_first; +/* The following variable is a insn `nop' used to check bundle states + with different number of inserted nops. */ -/* Emit NOPs to fill the delay between PREV_CYCLE and CLOCK_VAR. Used to - pad out the delay between MM (shifts, etc.) and integer operations. */ +static rtx ia64_nop; -static void -nop_cycles_until (clock_var, dump) - int clock_var; - FILE *dump; +/* The following function tries to issue NOPS_NUM nops for the current + state without advancing processor cycle. If it failed, the + function returns FALSE and frees the current state. */ + +static int +try_issue_nops (curr_state, nops_num) + struct bundle_state *curr_state; + int nops_num; { - int prev_clock = prev_cycle; - int cycles_left = clock_var - prev_clock; - bool did_stop = false; + int i; - /* Finish the previous cycle; pad it out with NOPs. */ - if (sched_data.cur == 3) - { - sched_emit_insn (gen_insn_group_barrier (GEN_INT (3))); - did_stop = true; - maybe_rotate (dump); - } - else if (sched_data.cur > 0) - { - int need_stop = 0; - int split = itanium_split_issue (sched_data.packet, prev_first); + for (i = 0; i < nops_num; i++) + if (state_transition (curr_state->dfa_state, ia64_nop) >= 0) + { + free_bundle_state (curr_state); + return FALSE; + } + return TRUE; +} - if (sched_data.cur < 3 && split > 3) - { - split = 3; - need_stop = 1; - } +/* The following function tries to issue INSN for the current + state without advancing processor cycle. If it failed, the + function returns FALSE and frees the current state. */ - if (split > sched_data.cur) - { - int i; - for (i = sched_data.cur; i < split; i++) - { - rtx t = sched_emit_insn (gen_nop_type (sched_data.packet->t[i])); - sched_data.types[i] = sched_data.packet->t[i]; - sched_data.insns[i] = t; - sched_data.stopbit[i] = 0; - } - sched_data.cur = split; - } +static int +try_issue_insn (curr_state, insn) + struct bundle_state *curr_state; + rtx insn; +{ + if (insn && state_transition (curr_state->dfa_state, insn) >= 0) + { + free_bundle_state (curr_state); + return FALSE; + } + return TRUE; +} - if (! need_stop && sched_data.cur > 0 && sched_data.cur < 6 - && cycles_left > 1) - { - int i; - for (i = sched_data.cur; i < 6; i++) - { - rtx t = sched_emit_insn (gen_nop_type (sched_data.packet->t[i])); - sched_data.types[i] = sched_data.packet->t[i]; - sched_data.insns[i] = t; - sched_data.stopbit[i] = 0; - } - sched_data.cur = 6; - cycles_left--; - need_stop = 1; - } +/* The following function tries to issue BEFORE_NOPS_NUM nops and INSN + starting with ORIGINATOR without advancing processor cycle. If + TRY_BUNDLE_END_P is TRUE, the function also tries to issue nops to + fill all bundle. If it was successful, the function creates new + bundle state and insert into the hash table and into + `index_to_bundle_states'. */ - if (need_stop || sched_data.cur == 6) +static void +issue_nops_and_insn (originator, before_nops_num, insn, try_bundle_end_p) + struct bundle_state *originator; + int before_nops_num; + rtx insn; + int try_bundle_end_p; +{ + struct bundle_state *curr_state; + + curr_state = get_free_bundle_state (); + memcpy (curr_state->dfa_state, originator->dfa_state, dfa_state_size); + curr_state->insn = insn; + curr_state->insn_num = originator->insn_num + 1; + curr_state->cost = originator->cost; + curr_state->originator = originator; + curr_state->before_nops_num = before_nops_num; + curr_state->after_nops_num = 0; + curr_state->accumulated_insns_num + = originator->accumulated_insns_num + before_nops_num; + curr_state->branch_deviation = originator->branch_deviation; + if (insn == NULL_RTX) + abort (); + else if (INSN_CODE (insn) == CODE_FOR_insn_group_barrier) + { + if (GET_MODE (insn) == TImode) + abort (); + if (!try_issue_nops (curr_state, before_nops_num)) + return; + if (!try_issue_insn (curr_state, insn)) + return; + memcpy (temp_dfa_state, curr_state->dfa_state, dfa_state_size); + if (state_transition (temp_dfa_state, dfa_pre_cycle_insn) >= 0 + && curr_state->accumulated_insns_num % 3 != 0) { - sched_emit_insn (gen_insn_group_barrier (GEN_INT (3))); - did_stop = true; + free_bundle_state (curr_state); + return; } - maybe_rotate (dump); } - - cycles_left--; - while (cycles_left > 0) + else if (GET_MODE (insn) != TImode) { - sched_emit_insn (gen_bundle_selector (GEN_INT (0))); - sched_emit_insn (gen_nop_type (TYPE_M)); - sched_emit_insn (gen_nop_type (TYPE_I)); - if (cycles_left > 1) + if (!try_issue_nops (curr_state, before_nops_num)) + return; + if (!try_issue_insn (curr_state, insn)) + return; + if (GET_CODE (PATTERN (insn)) != ASM_INPUT + && asm_noperands (PATTERN (insn)) < 0) + curr_state->accumulated_insns_num++; + if (ia64_safe_type (insn) == TYPE_L) + curr_state->accumulated_insns_num++; + } + else + { + state_transition (curr_state->dfa_state, dfa_pre_cycle_insn); + state_transition (curr_state->dfa_state, NULL); + curr_state->cost++; + if (!try_issue_nops (curr_state, before_nops_num)) + return; + if (!try_issue_insn (curr_state, insn)) + return; + if (GET_CODE (PATTERN (insn)) != ASM_INPUT + && asm_noperands (PATTERN (insn)) < 0) + curr_state->accumulated_insns_num++; + if (ia64_safe_type (insn) == TYPE_L) + curr_state->accumulated_insns_num++; + } + if (ia64_safe_type (insn) == TYPE_B) + curr_state->branch_deviation + += 2 - (curr_state->accumulated_insns_num - 1) % 3; + if (try_bundle_end_p && curr_state->accumulated_insns_num % 3 != 0) + { + if (insert_bundle_state (curr_state)) { - sched_emit_insn (gen_insn_group_barrier (GEN_INT (2))); - cycles_left--; + state_t dfa_state; + struct bundle_state *curr_state1; + struct bundle_state *allocated_states_chain; + + curr_state1 = get_free_bundle_state (); + dfa_state = curr_state1->dfa_state; + allocated_states_chain = curr_state1->allocated_states_chain; + *curr_state1 = *curr_state; + curr_state1->dfa_state = dfa_state; + curr_state1->allocated_states_chain = allocated_states_chain; + memcpy (curr_state1->dfa_state, curr_state->dfa_state, + dfa_state_size); + curr_state = curr_state1; } - sched_emit_insn (gen_nop_type (TYPE_I)); - sched_emit_insn (gen_insn_group_barrier (GEN_INT (3))); - did_stop = true; - cycles_left--; + if (!try_issue_nops (curr_state, + 3 - curr_state->accumulated_insns_num % 3)) + return; + curr_state->after_nops_num + = 3 - curr_state->accumulated_insns_num % 3; + curr_state->accumulated_insns_num + += 3 - curr_state->accumulated_insns_num % 3; } + if (!insert_bundle_state (curr_state)) + free_bundle_state (curr_state); + return; +} + +/* The following function returns position in the two window bundle + for given STATE. */ - if (did_stop) - init_insn_group_barriers (); +static int +get_max_pos (state) + state_t state; +{ + if (cpu_unit_reservation_p (state, pos_6)) + return 6; + else if (cpu_unit_reservation_p (state, pos_5)) + return 5; + else if (cpu_unit_reservation_p (state, pos_4)) + return 4; + else if (cpu_unit_reservation_p (state, pos_3)) + return 3; + else if (cpu_unit_reservation_p (state, pos_2)) + return 2; + else if (cpu_unit_reservation_p (state, pos_1)) + return 1; + else + return 0; } -/* We are about to being issuing insns for this clock cycle. - Override the default sort algorithm to better slot instructions. */ +/* The function returns code of a possible template for given position + and state. The function should be called only with 2 values of + position equal to 3 or 6. */ static int -ia64_internal_sched_reorder (dump, sched_verbose, ready, pn_ready, - reorder_type, clock_var) - FILE *dump ATTRIBUTE_UNUSED; - int sched_verbose ATTRIBUTE_UNUSED; - rtx *ready; - int *pn_ready; - int reorder_type, clock_var; +get_template (state, pos) + state_t state; + int pos; { - int n_asms; - int n_ready = *pn_ready; - rtx *e_ready = ready + n_ready; - rtx *insnp; - - if (sched_verbose) + switch (pos) { - fprintf (dump, "// ia64_sched_reorder (type %d):\n", reorder_type); - dump_current_packet (dump); + case 3: + if (cpu_unit_reservation_p (state, _0mii_)) + return 0; + else if (cpu_unit_reservation_p (state, _0mmi_)) + return 1; + else if (cpu_unit_reservation_p (state, _0mfi_)) + return 2; + else if (cpu_unit_reservation_p (state, _0mmf_)) + return 3; + else if (cpu_unit_reservation_p (state, _0bbb_)) + return 4; + else if (cpu_unit_reservation_p (state, _0mbb_)) + return 5; + else if (cpu_unit_reservation_p (state, _0mib_)) + return 6; + else if (cpu_unit_reservation_p (state, _0mmb_)) + return 7; + else if (cpu_unit_reservation_p (state, _0mfb_)) + return 8; + else if (cpu_unit_reservation_p (state, _0mlx_)) + return 9; + else + abort (); + case 6: + if (cpu_unit_reservation_p (state, _1mii_)) + return 0; + else if (cpu_unit_reservation_p (state, _1mmi_)) + return 1; + else if (cpu_unit_reservation_p (state, _1mfi_)) + return 2; + else if (_1mmf_ >= 0 && cpu_unit_reservation_p (state, _1mmf_)) + return 3; + else if (cpu_unit_reservation_p (state, _1bbb_)) + return 4; + else if (cpu_unit_reservation_p (state, _1mbb_)) + return 5; + else if (cpu_unit_reservation_p (state, _1mib_)) + return 6; + else if (cpu_unit_reservation_p (state, _1mmb_)) + return 7; + else if (cpu_unit_reservation_p (state, _1mfb_)) + return 8; + else if (cpu_unit_reservation_p (state, _1mlx_)) + return 9; + else + abort (); + default: + abort (); } +} - /* Work around the pipeline flush that will occurr if the results of - an MM instruction are accessed before the result is ready. Intel - documentation says this only happens with IALU, ISHF, ILOG, LD, - and ST consumers, but experimental evidence shows that *any* non-MM - type instruction will incurr the flush. */ - if (reorder_type == 0 && clock_var > 0 && ia64_final_schedule) - { - for (insnp = ready; insnp < e_ready; insnp++) - { - rtx insn = *insnp, link; - enum attr_itanium_class t = ia64_safe_itanium_class (insn); - - if (t == ITANIUM_CLASS_MMMUL - || t == ITANIUM_CLASS_MMSHF - || t == ITANIUM_CLASS_MMSHFI) - continue; +/* The following function returns an insn important for insn bundling + followed by INSN and before TAIL. */ - for (link = LOG_LINKS (insn); link; link = XEXP (link, 1)) - if (REG_NOTE_KIND (link) == 0) - { - rtx other = XEXP (link, 0); - enum attr_itanium_class t0 = ia64_safe_itanium_class (other); - if (t0 == ITANIUM_CLASS_MMSHF || t0 == ITANIUM_CLASS_MMMUL) - { - nop_cycles_until (clock_var, sched_verbose ? dump : NULL); - goto out; - } - } - } - } - out: +static rtx +get_next_important_insn (insn, tail) + rtx insn, tail; +{ + for (; insn && insn != tail; insn = NEXT_INSN (insn)) + if (INSN_P (insn) + && ia64_safe_itanium_class (insn) != ITANIUM_CLASS_IGNORE + && GET_CODE (PATTERN (insn)) != USE + && GET_CODE (PATTERN (insn)) != CLOBBER) + return insn; + return NULL_RTX; +} - prev_first = sched_data.first_slot; - prev_cycle = clock_var; +/* The following function does insn bundling. Bundling algorithm is + based on dynamic programming. It tries to insert different number of + nop insns before/after the real insns. At the end of EBB, it chooses the + best alternative and then, moving back in EBB, inserts templates for + the best alternative. The algorithm is directed by information + (changes of simulated processor cycle) created by the 2nd insn + scheduling. */ - if (reorder_type == 0) - maybe_rotate (sched_verbose ? dump : NULL); +static void +bundling (dump, verbose, prev_head_insn, tail) + FILE *dump; + int verbose; + rtx prev_head_insn, tail; +{ + struct bundle_state *curr_state, *next_state, *best_state; + rtx insn, next_insn; + int insn_num; + int i, bundle_end_p; + int pos, max_pos, template0, template1; + rtx b; + rtx nop; + enum attr_type type; - /* First, move all USEs, CLOBBERs and other crud out of the way. */ - n_asms = 0; - for (insnp = ready; insnp < e_ready; insnp++) - if (insnp < e_ready) + insn_num = 0; + for (insn = NEXT_INSN (prev_head_insn); + insn && insn != tail; + insn = NEXT_INSN (insn)) + if (INSN_P (insn)) + insn_num++; + if (insn_num == 0) + return; + bundling_p = 1; + dfa_clean_insn_cache (); + initiate_bundle_state_table (); + index_to_bundle_states = xmalloc ((insn_num + 2) + * sizeof (struct bundle_state *)); + /* First (forward) pass -- generates states. */ + curr_state = get_free_bundle_state (); + curr_state->insn = NULL; + curr_state->before_nops_num = 0; + curr_state->after_nops_num = 0; + curr_state->insn_num = 0; + curr_state->cost = 0; + curr_state->accumulated_insns_num = 0; + curr_state->branch_deviation = 0; + curr_state->next = NULL; + curr_state->originator = NULL; + state_reset (curr_state->dfa_state); + index_to_bundle_states [0] = curr_state; + insn_num = 0; + for (insn = NEXT_INSN (prev_head_insn); + insn != tail; + insn = NEXT_INSN (insn)) + if (INSN_P (insn) + && (ia64_safe_itanium_class (insn) == ITANIUM_CLASS_IGNORE + || GET_CODE (PATTERN (insn)) == USE + || GET_CODE (PATTERN (insn)) == CLOBBER) + && GET_MODE (insn) == TImode) { - rtx insn = *insnp; - enum attr_type t = ia64_safe_type (insn); - if (t == TYPE_UNKNOWN) - { - if (GET_CODE (PATTERN (insn)) == ASM_INPUT - || asm_noperands (PATTERN (insn)) >= 0) - { - rtx lowest = ready[n_asms]; - ready[n_asms] = insn; - *insnp = lowest; - n_asms++; - } - else - { - rtx highest = ready[n_ready - 1]; - ready[n_ready - 1] = insn; - *insnp = highest; - if (ia64_final_schedule && group_barrier_needed_p (insn)) - { - schedule_stop (sched_verbose ? dump : NULL); - sched_data.last_was_stop = 1; - maybe_rotate (sched_verbose ? dump : NULL); - } - - return 1; - } - } + PUT_MODE (insn, VOIDmode); + for (next_insn = NEXT_INSN (insn); + next_insn != tail; + next_insn = NEXT_INSN (next_insn)) + if (INSN_P (next_insn) + && ia64_safe_itanium_class (next_insn) != ITANIUM_CLASS_IGNORE + && GET_CODE (PATTERN (next_insn)) != USE + && GET_CODE (PATTERN (next_insn)) != CLOBBER) + { + PUT_MODE (next_insn, TImode); + break; + } } - if (n_asms < n_ready) + for (insn = get_next_important_insn (NEXT_INSN (prev_head_insn), tail); + insn != NULL_RTX; + insn = next_insn) { - /* Some normal insns to process. Skip the asms. */ - ready += n_asms; - n_ready -= n_asms; - } - else if (n_ready > 0) - { - /* Only asm insns left. */ - if (ia64_final_schedule && group_barrier_needed_p (ready[n_ready - 1])) + if (!INSN_P (insn) + || ia64_safe_itanium_class (insn) == ITANIUM_CLASS_IGNORE + || GET_CODE (PATTERN (insn)) == USE + || GET_CODE (PATTERN (insn)) == CLOBBER) + abort (); + next_insn = get_next_important_insn (NEXT_INSN (insn), tail); + insn_num++; + index_to_bundle_states [insn_num] = NULL; + for (curr_state = index_to_bundle_states [insn_num - 1]; + curr_state != NULL; + curr_state = next_state) { - schedule_stop (sched_verbose ? dump : NULL); - sched_data.last_was_stop = 1; - maybe_rotate (sched_verbose ? dump : NULL); + pos = curr_state->accumulated_insns_num % 3; + type = ia64_safe_type (insn); + next_state = curr_state->next; + bundle_end_p + = (next_insn == NULL_RTX + || (GET_MODE (next_insn) == TImode + && INSN_CODE (insn) != CODE_FOR_insn_group_barrier)); + if (type == TYPE_F || type == TYPE_B || type == TYPE_L + || type == TYPE_S + /* We need to insert 2 Nops for cases like M_MII. */ + || (type == TYPE_M && ia64_tune == PROCESSOR_ITANIUM + && !bundle_end_p && pos == 1)) + issue_nops_and_insn (curr_state, 2, insn, bundle_end_p); + issue_nops_and_insn (curr_state, 1, insn, bundle_end_p); + issue_nops_and_insn (curr_state, 0, insn, bundle_end_p); } - cycle_end_fill_slots (sched_verbose ? dump : NULL); - return 1; + if (index_to_bundle_states [insn_num] == NULL) + abort (); + for (curr_state = index_to_bundle_states [insn_num]; + curr_state != NULL; + curr_state = curr_state->next) + if (verbose >= 2 && dump) + { + struct DFA_chip + { + unsigned short one_automaton_state; + unsigned short oneb_automaton_state; + unsigned short two_automaton_state; + unsigned short twob_automaton_state; + }; + + fprintf + (dump, + "// Bundle state %d (orig %d, cost %d, nops %d/%d, insns %d, branch %d, state %d) for %d\n", + curr_state->unique_num, + (curr_state->originator == NULL + ? -1 : curr_state->originator->unique_num), + curr_state->cost, + curr_state->before_nops_num, curr_state->after_nops_num, + curr_state->accumulated_insns_num, curr_state->branch_deviation, + (ia64_tune == PROCESSOR_ITANIUM + ? ((struct DFA_chip *) curr_state->dfa_state)->oneb_automaton_state + : ((struct DFA_chip *) curr_state->dfa_state)->twob_automaton_state), + INSN_UID (insn)); + } } - - if (ia64_final_schedule) - { - int nr_need_stop = 0; - - for (insnp = ready; insnp < e_ready; insnp++) - if (safe_group_barrier_needed_p (*insnp)) - nr_need_stop++; - - /* Schedule a stop bit if - - all insns require a stop bit, or - - we are starting a new cycle and _any_ insns require a stop bit. - The reason for the latter is that if our schedule is accurate, then - the additional stop won't decrease performance at this point (since - there's a split issue at this point anyway), but it gives us more - freedom when scheduling the currently ready insns. */ - if ((reorder_type == 0 && nr_need_stop) - || (reorder_type == 1 && n_ready == nr_need_stop)) + if (index_to_bundle_states [insn_num] == NULL) + abort (); + /* Finding state with a minimal cost: */ + best_state = NULL; + for (curr_state = index_to_bundle_states [insn_num]; + curr_state != NULL; + curr_state = curr_state->next) + if (curr_state->accumulated_insns_num % 3 == 0 + && (best_state == NULL || best_state->cost > curr_state->cost + || (best_state->cost == curr_state->cost + && (curr_state->accumulated_insns_num + < best_state->accumulated_insns_num + || (curr_state->accumulated_insns_num + == best_state->accumulated_insns_num + && curr_state->branch_deviation + < best_state->branch_deviation))))) + best_state = curr_state; + /* Second (backward) pass: adding nops and templates: */ + insn_num = best_state->before_nops_num; + template0 = template1 = -1; + for (curr_state = best_state; + curr_state->originator != NULL; + curr_state = curr_state->originator) + { + insn = curr_state->insn; + insn_num++; + if (verbose >= 2 && dump) { - schedule_stop (sched_verbose ? dump : NULL); - sched_data.last_was_stop = 1; - maybe_rotate (sched_verbose ? dump : NULL); - if (reorder_type == 1) - return 0; + struct DFA_chip + { + unsigned short one_automaton_state; + unsigned short oneb_automaton_state; + unsigned short two_automaton_state; + unsigned short twob_automaton_state; + }; + + fprintf + (dump, + "// Best %d (orig %d, cost %d, nops %d/%d, insns %d, branch %d, state %d) for %d\n", + curr_state->unique_num, + (curr_state->originator == NULL + ? -1 : curr_state->originator->unique_num), + curr_state->cost, + curr_state->before_nops_num, curr_state->after_nops_num, + curr_state->accumulated_insns_num, curr_state->branch_deviation, + (ia64_tune == PROCESSOR_ITANIUM + ? ((struct DFA_chip *) curr_state->dfa_state)->oneb_automaton_state + : ((struct DFA_chip *) curr_state->dfa_state)->twob_automaton_state), + INSN_UID (insn)); } - else + max_pos = get_max_pos (curr_state->dfa_state); + if (max_pos == 6 || (max_pos == 3 && template0 < 0)) { - int deleted = 0; - insnp = e_ready; - /* Move down everything that needs a stop bit, preserving relative - order. */ - while (insnp-- > ready + deleted) - while (insnp >= ready + deleted) - { - rtx insn = *insnp; - if (! safe_group_barrier_needed_p (insn)) - break; - memmove (ready + 1, ready, (insnp - ready) * sizeof (rtx)); - *ready = insn; - deleted++; - } - n_ready -= deleted; - ready += deleted; - if (deleted != nr_need_stop) + pos = max_pos; + if (max_pos == 3) + template0 = get_template (curr_state->dfa_state, 3); + else + { + template1 = get_template (curr_state->dfa_state, 3); + template0 = get_template (curr_state->dfa_state, 6); + } + } + if (max_pos > 3 && template1 < 0) + { + if (pos > 3) abort (); + template1 = get_template (curr_state->dfa_state, 3); + pos += 3; + } + for (i = 0; i < curr_state->after_nops_num; i++) + { + nop = gen_nop (); + emit_insn_after (nop, insn); + pos--; + if (pos < 0) + abort (); + if (pos % 3 == 0) + { + if (template0 < 0) + abort (); + b = gen_bundle_selector (GEN_INT (template0)); + ia64_emit_insn_before (b, nop); + template0 = template1; + template1 = -1; + } + } + if (INSN_CODE (insn) != CODE_FOR_insn_group_barrier + && GET_CODE (PATTERN (insn)) != ASM_INPUT + && asm_noperands (PATTERN (insn)) < 0) + pos--; + if (ia64_safe_type (insn) == TYPE_L) + pos--; + if (pos < 0) + abort (); + if (pos % 3 == 0 + && INSN_CODE (insn) != CODE_FOR_insn_group_barrier + && GET_CODE (PATTERN (insn)) != ASM_INPUT + && asm_noperands (PATTERN (insn)) < 0) + { + if (template0 < 0) + abort (); + b = gen_bundle_selector (GEN_INT (template0)); + ia64_emit_insn_before (b, insn); + b = PREV_INSN (insn); + insn = b; + template0 = template1; + template1 = -1; + } + for (i = 0; i < curr_state->before_nops_num; i++) + { + nop = gen_nop (); + ia64_emit_insn_before (nop, insn); + nop = PREV_INSN (insn); + insn = nop; + pos--; + if (pos < 0) + abort (); + if (pos % 3 == 0) + { + if (template0 < 0) + abort (); + b = gen_bundle_selector (GEN_INT (template0)); + ia64_emit_insn_before (b, insn); + b = PREV_INSN (insn); + insn = b; + template0 = template1; + template1 = -1; + } } } - - return itanium_reorder (sched_verbose ? dump : NULL, - ready, e_ready, reorder_type == 1); + if (ia64_tune == PROCESSOR_ITANIUM) + /* Insert additional cycles for MM-insns: */ + for (insn = get_next_important_insn (NEXT_INSN (prev_head_insn), tail); + insn != NULL_RTX; + insn = next_insn) + { + if (!INSN_P (insn) + || ia64_safe_itanium_class (insn) == ITANIUM_CLASS_IGNORE + || GET_CODE (PATTERN (insn)) == USE + || GET_CODE (PATTERN (insn)) == CLOBBER) + abort (); + next_insn = get_next_important_insn (NEXT_INSN (insn), tail); + if (INSN_UID (insn) < clocks_length && add_cycles [INSN_UID (insn)]) + { + rtx last; + int i, j, n; + int pred_stop_p; + + last = prev_active_insn (insn); + pred_stop_p = recog_memoized (last) == CODE_FOR_insn_group_barrier; + if (pred_stop_p) + last = prev_active_insn (last); + n = 0; + for (;; last = prev_active_insn (last)) + if (recog_memoized (last) == CODE_FOR_bundle_selector) + { + template0 = XINT (XVECEXP (PATTERN (last), 0, 0), 0); + if (template0 == 9) + PATTERN (last) + = gen_bundle_selector (GEN_INT (2)); /* -> MFI */ + break; + } + else if (recog_memoized (last) != CODE_FOR_insn_group_barrier) + n++; + if ((pred_stop_p && n == 0) || n > 2 + || (template0 == 9 && n != 0)) + abort (); + for (j = 3 - n; j > 0; j --) + ia64_emit_insn_before (gen_nop (), insn); + add_cycles [INSN_UID (insn)]--; + if (!pred_stop_p || add_cycles [INSN_UID (insn)]) + ia64_emit_insn_before (gen_insn_group_barrier (GEN_INT (3)), + insn); + if (pred_stop_p) + add_cycles [INSN_UID (insn)]--; + for (i = add_cycles [INSN_UID (insn)]; i > 0; i--) + { + /* Insert .MII bundle. */ + ia64_emit_insn_before (gen_bundle_selector (GEN_INT (0)), + insn); + ia64_emit_insn_before (gen_nop (), insn); + ia64_emit_insn_before (gen_nop (), insn); + if (i > 1) + { + ia64_emit_insn_before + (gen_insn_group_barrier (GEN_INT (3)), insn); + i--; + } + ia64_emit_insn_before (gen_nop (), insn); + ia64_emit_insn_before (gen_insn_group_barrier (GEN_INT (3)), + insn); + } + ia64_emit_insn_before (gen_bundle_selector (GEN_INT (template0)), + insn); + for (j = n; j > 0; j --) + ia64_emit_insn_before (gen_nop (), insn); + if (pred_stop_p) + ia64_emit_insn_before (gen_insn_group_barrier (GEN_INT (3)), + insn); + } + } + free (index_to_bundle_states); + finish_bundle_state_table (); + bundling_p = 0; + dfa_clean_insn_cache (); } -static int -ia64_sched_reorder (dump, sched_verbose, ready, pn_ready, clock_var) +/* The following function is called at the end of scheduling BB or + EBB. After reload, it inserts stop bits and does insn bundling. */ + +static void +ia64_sched_finish (dump, sched_verbose) FILE *dump; int sched_verbose; - rtx *ready; - int *pn_ready; - int clock_var; { - return ia64_internal_sched_reorder (dump, sched_verbose, ready, - pn_ready, 0, clock_var); + if (sched_verbose) + fprintf (dump, "// Finishing schedule.\n"); + if (!reload_completed) + return; + if (reload_completed) + { + final_emit_insn_group_barriers (dump); + bundling (dump, sched_verbose, current_sched_info->prev_head, + current_sched_info->next_tail); + if (sched_verbose && dump) + fprintf (dump, "// finishing %d-%d\n", + INSN_UID (NEXT_INSN (current_sched_info->prev_head)), + INSN_UID (PREV_INSN (current_sched_info->next_tail))); + + return; + } } -/* Like ia64_sched_reorder, but called after issuing each insn. - Override the default sort algorithm to better slot instructions. */ +/* The following function inserts stop bits in scheduled BB or EBB. */ -static int -ia64_sched_reorder2 (dump, sched_verbose, ready, pn_ready, clock_var) +static void +final_emit_insn_group_barriers (dump) FILE *dump ATTRIBUTE_UNUSED; - int sched_verbose ATTRIBUTE_UNUSED; - rtx *ready; - int *pn_ready; - int clock_var; { - if (sched_data.last_was_stop) - return 0; + rtx insn; + int need_barrier_p = 0; + rtx prev_insn = NULL_RTX; - /* Detect one special case and try to optimize it. - If we have 1.M;;MI 2.MIx, and slots 2.1 (M) and 2.2 (I) are both NOPs, - then we can get better code by transforming this to 1.MFB;; 2.MIx. */ - if (sched_data.first_slot == 1 - && sched_data.stopbit[0] - && ((sched_data.cur == 4 - && (sched_data.types[1] == TYPE_M || sched_data.types[1] == TYPE_A) - && (sched_data.types[2] == TYPE_I || sched_data.types[2] == TYPE_A) - && (sched_data.types[3] != TYPE_M && sched_data.types[3] != TYPE_A)) - || (sched_data.cur == 3 - && (sched_data.types[1] == TYPE_M - || sched_data.types[1] == TYPE_A) - && (sched_data.types[2] != TYPE_M - && sched_data.types[2] != TYPE_I - && sched_data.types[2] != TYPE_A)))) - - { - int i, best; - rtx stop = sched_data.insns[1]; + init_insn_group_barriers (); - /* Search backward for the stop bit that must be there. */ - while (1) + for (insn = NEXT_INSN (current_sched_info->prev_head); + insn != current_sched_info->next_tail; + insn = NEXT_INSN (insn)) + { + if (GET_CODE (insn) == BARRIER) { - int insn_code; - - stop = PREV_INSN (stop); - if (GET_CODE (stop) != INSN) - abort (); - insn_code = recog_memoized (stop); - - /* Ignore .pred.rel.mutex. + rtx last = prev_active_insn (insn); - ??? Update this to ignore cycle display notes too - ??? once those are implemented */ - if (insn_code == CODE_FOR_pred_rel_mutex - || insn_code == CODE_FOR_prologue_use) + if (! last) continue; + if (GET_CODE (last) == JUMP_INSN + && GET_CODE (PATTERN (last)) == ADDR_DIFF_VEC) + last = prev_active_insn (last); + if (recog_memoized (last) != CODE_FOR_insn_group_barrier) + emit_insn_after (gen_insn_group_barrier (GEN_INT (3)), last); - if (insn_code == CODE_FOR_insn_group_barrier) - break; - abort (); + init_insn_group_barriers (); + need_barrier_p = 0; + prev_insn = NULL_RTX; } - - /* Adjust the stop bit's slot selector. */ - if (INTVAL (XVECEXP (PATTERN (stop), 0, 0)) != 1) - abort (); - XVECEXP (PATTERN (stop), 0, 0) = GEN_INT (3); - - sched_data.stopbit[0] = 0; - sched_data.stopbit[2] = 1; - - sched_data.types[5] = sched_data.types[3]; - sched_data.types[4] = sched_data.types[2]; - sched_data.types[3] = sched_data.types[1]; - sched_data.insns[5] = sched_data.insns[3]; - sched_data.insns[4] = sched_data.insns[2]; - sched_data.insns[3] = sched_data.insns[1]; - sched_data.stopbit[5] = sched_data.stopbit[4] = sched_data.stopbit[3] = 0; - sched_data.cur += 2; - sched_data.first_slot = 3; - for (i = 0; i < NR_PACKETS; i++) + else if (INSN_P (insn)) { - const struct ia64_packet *p = packets + i; - if (p->t[0] == TYPE_M && p->t[1] == TYPE_F && p->t[2] == TYPE_B) + if (recog_memoized (insn) == CODE_FOR_insn_group_barrier) { - sched_data.packet = p; - break; + init_insn_group_barriers (); + need_barrier_p = 0; + prev_insn = NULL_RTX; } - } - rotate_one_bundle (sched_verbose ? dump : NULL); - - best = 6; - for (i = 0; i < NR_PACKETS; i++) - { - const struct ia64_packet *p = packets + i; - int split = get_split (p, sched_data.first_slot); - int next; - - /* Disallow multiway branches here. */ - if (p->t[1] == TYPE_B) - continue; - - if (packet_matches_p (p, split, &next) && next < best) + else if (need_barrier_p || group_barrier_needed_p (insn)) { - best = next; - sched_data.packet = p; - sched_data.split = split; + if (TARGET_EARLY_STOP_BITS) + { + rtx last; + + for (last = insn; + last != current_sched_info->prev_head; + last = PREV_INSN (last)) + if (INSN_P (last) && GET_MODE (last) == TImode + && stops_p [INSN_UID (last)]) + break; + if (last == current_sched_info->prev_head) + last = insn; + last = prev_active_insn (last); + if (last + && recog_memoized (last) != CODE_FOR_insn_group_barrier) + emit_insn_after (gen_insn_group_barrier (GEN_INT (3)), + last); + init_insn_group_barriers (); + for (last = NEXT_INSN (last); + last != insn; + last = NEXT_INSN (last)) + if (INSN_P (last)) + group_barrier_needed_p (last); + } + else + { + emit_insn_before (gen_insn_group_barrier (GEN_INT (3)), + insn); + init_insn_group_barriers (); + } + group_barrier_needed_p (insn); + prev_insn = NULL_RTX; } + else if (recog_memoized (insn) >= 0) + prev_insn = insn; + need_barrier_p = (GET_CODE (insn) == CALL_INSN + || GET_CODE (PATTERN (insn)) == ASM_INPUT + || asm_noperands (PATTERN (insn)) >= 0); } - if (best == 6) - abort (); } +} - if (*pn_ready > 0) - { - int more = ia64_internal_sched_reorder (dump, sched_verbose, - ready, pn_ready, 1, - clock_var); - if (more) - return more; - /* Did we schedule a stop? If so, finish this cycle. */ - if (sched_data.cur == sched_data.first_slot) - return 0; - } + - if (sched_verbose) - fprintf (dump, "// Can't issue more this cycle; updating type array.\n"); +/* If the following function returns TRUE, we will use the the DFA + insn scheduler. */ - cycle_end_fill_slots (sched_verbose ? dump : NULL); - if (sched_verbose) - dump_current_packet (dump); - return 0; +static int +ia64_use_dfa_pipeline_interface () +{ + return 1; } -/* We are about to issue INSN. Return the number of insns left on the - ready queue that can be issued this cycle. */ +/* If the following function returns TRUE, we will use the the DFA + insn scheduler. */ static int -ia64_variable_issue (dump, sched_verbose, insn, can_issue_more) - FILE *dump; - int sched_verbose; - rtx insn; - int can_issue_more ATTRIBUTE_UNUSED; +ia64_first_cycle_multipass_dfa_lookahead () { - enum attr_type t = ia64_safe_type (insn); + return (reload_completed ? 6 : 4); +} - if (sched_data.last_was_stop) - { - int t = sched_data.first_slot; - if (t == 0) - t = 3; - ia64_emit_insn_before (gen_insn_group_barrier (GEN_INT (t)), insn); - init_insn_group_barriers (); - sched_data.last_was_stop = 0; - } +/* The following function initiates variable `dfa_pre_cycle_insn'. */ - if (t == TYPE_UNKNOWN) +static void +ia64_init_dfa_pre_cycle_insn () +{ + if (temp_dfa_state == NULL) { - if (sched_verbose) - fprintf (dump, "// Ignoring type %s\n", type_names[t]); - if (GET_CODE (PATTERN (insn)) == ASM_INPUT - || asm_noperands (PATTERN (insn)) >= 0) - { - /* This must be some kind of asm. Clear the scheduling state. */ - rotate_two_bundles (sched_verbose ? dump : NULL); - if (ia64_final_schedule) - group_barrier_needed_p (insn); - } - return 1; + dfa_state_size = state_size (); + temp_dfa_state = xmalloc (dfa_state_size); + prev_cycle_state = xmalloc (dfa_state_size); } + dfa_pre_cycle_insn = make_insn_raw (gen_pre_cycle ()); + PREV_INSN (dfa_pre_cycle_insn) = NEXT_INSN (dfa_pre_cycle_insn) = NULL_RTX; + recog_memoized (dfa_pre_cycle_insn); + dfa_stop_insn = make_insn_raw (gen_insn_group_barrier (GEN_INT (3))); + PREV_INSN (dfa_stop_insn) = NEXT_INSN (dfa_stop_insn) = NULL_RTX; + recog_memoized (dfa_stop_insn); +} - /* This is _not_ just a sanity check. group_barrier_needed_p will update - important state info. Don't delete this test. */ - if (ia64_final_schedule - && group_barrier_needed_p (insn)) - abort (); +/* The following function returns the pseudo insn DFA_PRE_CYCLE_INSN + used by the DFA insn scheduler. */ - sched_data.stopbit[sched_data.cur] = 0; - sched_data.insns[sched_data.cur] = insn; - sched_data.types[sched_data.cur] = t; +static rtx +ia64_dfa_pre_cycle_insn () +{ + return dfa_pre_cycle_insn; +} - sched_data.cur++; - if (sched_verbose) - fprintf (dump, "// Scheduling insn %d of type %s\n", - INSN_UID (insn), type_names[t]); +/* The following function returns TRUE if PRODUCER (of type ilog or + ld) produces address for CONSUMER (of type st or stf). */ - if (GET_CODE (insn) == CALL_INSN && ia64_final_schedule) - { - schedule_stop (sched_verbose ? dump : NULL); - sched_data.last_was_stop = 1; - } +int +ia64_st_address_bypass_p (producer, consumer) + rtx producer; + rtx consumer; +{ + rtx dest, reg, mem; - return 1; + if (producer == NULL_RTX || consumer == NULL_RTX) + abort (); + dest = ia64_single_set (producer); + if (dest == NULL_RTX || (reg = SET_DEST (dest)) == NULL_RTX + || (GET_CODE (reg) != REG && GET_CODE (reg) != SUBREG)) + abort (); + if (GET_CODE (reg) == SUBREG) + reg = SUBREG_REG (reg); + dest = ia64_single_set (consumer); + if (dest == NULL_RTX || (mem = SET_DEST (dest)) == NULL_RTX + || GET_CODE (mem) != MEM) + abort (); + return reg_mentioned_p (reg, mem); } -/* Free data allocated by ia64_sched_init. */ +/* The following function returns TRUE if PRODUCER (of type ilog or + ld) produces address for CONSUMER (of type ld or fld). */ -static void -ia64_sched_finish (dump, sched_verbose) - FILE *dump; - int sched_verbose; +int +ia64_ld_address_bypass_p (producer, consumer) + rtx producer; + rtx consumer; { - if (sched_verbose) - fprintf (dump, "// Finishing schedule.\n"); - rotate_two_bundles (NULL); - free (sched_types); - free (sched_ready); + rtx dest, src, reg, mem; + + if (producer == NULL_RTX || consumer == NULL_RTX) + abort (); + dest = ia64_single_set (producer); + if (dest == NULL_RTX || (reg = SET_DEST (dest)) == NULL_RTX + || (GET_CODE (reg) != REG && GET_CODE (reg) != SUBREG)) + abort (); + if (GET_CODE (reg) == SUBREG) + reg = SUBREG_REG (reg); + src = ia64_single_set (consumer); + if (src == NULL_RTX || (mem = SET_SRC (src)) == NULL_RTX) + abort (); + if (GET_CODE (mem) == UNSPEC && XVECLEN (mem, 0) > 0) + mem = XVECEXP (mem, 0, 0); + while (GET_CODE (mem) == SUBREG || GET_CODE (mem) == ZERO_EXTEND) + mem = XEXP (mem, 0); + if (GET_CODE (mem) != MEM) + abort (); + return reg_mentioned_p (reg, mem); +} + +/* The following function returns TRUE if INSN produces address for a + load/store insn. We will place such insns into M slot because it + decreases its latency time. */ + +int +ia64_produce_address_p (insn) + rtx insn; +{ + return insn->call; } + /* Emit pseudo-ops for the assembler to describe predicate relations. At present this assumes that we only consider predicate pairs to @@ -6887,111 +7006,6 @@ emit_predicate_relation_info () } } -/* Generate a NOP instruction of type T. We will never generate L type - nops. */ - -static rtx -gen_nop_type (t) - enum attr_type t; -{ - switch (t) - { - case TYPE_M: - return gen_nop_m (); - case TYPE_I: - return gen_nop_i (); - case TYPE_B: - return gen_nop_b (); - case TYPE_F: - return gen_nop_f (); - case TYPE_X: - return gen_nop_x (); - default: - abort (); - } -} - -/* After the last scheduling pass, fill in NOPs. It's easier to do this - here than while scheduling. */ - -static void -ia64_emit_nops () -{ - rtx insn; - const struct bundle *b = 0; - int bundle_pos = 0; - - for (insn = get_insns (); insn; insn = NEXT_INSN (insn)) - { - rtx pat; - enum attr_type t; - pat = INSN_P (insn) ? PATTERN (insn) : const0_rtx; - if (GET_CODE (pat) == USE || GET_CODE (pat) == CLOBBER) - continue; - if ((GET_CODE (pat) == UNSPEC && XINT (pat, 1) == UNSPEC_BUNDLE_SELECTOR) - || GET_CODE (insn) == CODE_LABEL) - { - if (b) - while (bundle_pos < 3) - { - emit_insn_before (gen_nop_type (b->t[bundle_pos]), insn); - bundle_pos++; - } - if (GET_CODE (insn) != CODE_LABEL) - b = bundle + INTVAL (XVECEXP (pat, 0, 0)); - else - b = 0; - bundle_pos = 0; - continue; - } - else if (GET_CODE (pat) == UNSPEC_VOLATILE - && XINT (pat, 1) == UNSPECV_INSN_GROUP_BARRIER) - { - int t = INTVAL (XVECEXP (pat, 0, 0)); - if (b) - while (bundle_pos < t) - { - emit_insn_before (gen_nop_type (b->t[bundle_pos]), insn); - bundle_pos++; - } - continue; - } - - if (bundle_pos == 3) - b = 0; - - if (b && INSN_P (insn)) - { - t = ia64_safe_type (insn); - if (asm_noperands (PATTERN (insn)) >= 0 - || GET_CODE (PATTERN (insn)) == ASM_INPUT) - { - while (bundle_pos < 3) - { - emit_insn_before (gen_nop_type (b->t[bundle_pos]), insn); - bundle_pos++; - } - continue; - } - - if (t == TYPE_UNKNOWN) - continue; - while (bundle_pos < 3) - { - if (t == b->t[bundle_pos] - || (t == TYPE_A && (b->t[bundle_pos] == TYPE_M - || b->t[bundle_pos] == TYPE_I))) - break; - - emit_insn_before (gen_nop_type (b->t[bundle_pos]), insn); - bundle_pos++; - } - if (bundle_pos < 3) - bundle_pos++; - } - } -} - /* Perform machine dependent operations on the rtl chain INSNS. */ void @@ -7014,14 +7028,91 @@ ia64_reorg (insns) { timevar_push (TV_SCHED2); ia64_final_schedule = 1; + + initiate_bundle_states (); + ia64_nop = make_insn_raw (gen_nop ()); + PREV_INSN (ia64_nop) = NEXT_INSN (ia64_nop) = NULL_RTX; + recog_memoized (ia64_nop); + clocks_length = get_max_uid () + 1; + stops_p = (char *) xmalloc (clocks_length); + memset (stops_p, 0, clocks_length); + if (ia64_tune == PROCESSOR_ITANIUM) + { + clocks = (int *) xmalloc (clocks_length * sizeof (int)); + memset (clocks, 0, clocks_length * sizeof (int)); + add_cycles = (int *) xmalloc (clocks_length * sizeof (int)); + memset (add_cycles, 0, clocks_length * sizeof (int)); + } + if (ia64_tune == PROCESSOR_ITANIUM2) + { + pos_1 = get_cpu_unit_code ("2_1"); + pos_2 = get_cpu_unit_code ("2_2"); + pos_3 = get_cpu_unit_code ("2_3"); + pos_4 = get_cpu_unit_code ("2_4"); + pos_5 = get_cpu_unit_code ("2_5"); + pos_6 = get_cpu_unit_code ("2_6"); + _0mii_ = get_cpu_unit_code ("2b_0mii."); + _0mmi_ = get_cpu_unit_code ("2b_0mmi."); + _0mfi_ = get_cpu_unit_code ("2b_0mfi."); + _0mmf_ = get_cpu_unit_code ("2b_0mmf."); + _0bbb_ = get_cpu_unit_code ("2b_0bbb."); + _0mbb_ = get_cpu_unit_code ("2b_0mbb."); + _0mib_ = get_cpu_unit_code ("2b_0mib."); + _0mmb_ = get_cpu_unit_code ("2b_0mmb."); + _0mfb_ = get_cpu_unit_code ("2b_0mfb."); + _0mlx_ = get_cpu_unit_code ("2b_0mlx."); + _1mii_ = get_cpu_unit_code ("2b_1mii."); + _1mmi_ = get_cpu_unit_code ("2b_1mmi."); + _1mfi_ = get_cpu_unit_code ("2b_1mfi."); + _1mmf_ = get_cpu_unit_code ("2b_1mmf."); + _1bbb_ = get_cpu_unit_code ("2b_1bbb."); + _1mbb_ = get_cpu_unit_code ("2b_1mbb."); + _1mib_ = get_cpu_unit_code ("2b_1mib."); + _1mmb_ = get_cpu_unit_code ("2b_1mmb."); + _1mfb_ = get_cpu_unit_code ("2b_1mfb."); + _1mlx_ = get_cpu_unit_code ("2b_1mlx."); + } + else + { + pos_1 = get_cpu_unit_code ("1_1"); + pos_2 = get_cpu_unit_code ("1_2"); + pos_3 = get_cpu_unit_code ("1_3"); + pos_4 = get_cpu_unit_code ("1_4"); + pos_5 = get_cpu_unit_code ("1_5"); + pos_6 = get_cpu_unit_code ("1_6"); + _0mii_ = get_cpu_unit_code ("1b_0mii."); + _0mmi_ = get_cpu_unit_code ("1b_0mmi."); + _0mfi_ = get_cpu_unit_code ("1b_0mfi."); + _0mmf_ = get_cpu_unit_code ("1b_0mmf."); + _0bbb_ = get_cpu_unit_code ("1b_0bbb."); + _0mbb_ = get_cpu_unit_code ("1b_0mbb."); + _0mib_ = get_cpu_unit_code ("1b_0mib."); + _0mmb_ = get_cpu_unit_code ("1b_0mmb."); + _0mfb_ = get_cpu_unit_code ("1b_0mfb."); + _0mlx_ = get_cpu_unit_code ("1b_0mlx."); + _1mii_ = get_cpu_unit_code ("1b_1mii."); + _1mmi_ = get_cpu_unit_code ("1b_1mmi."); + _1mfi_ = get_cpu_unit_code ("1b_1mfi."); + _1mmf_ = get_cpu_unit_code ("1b_1mmf."); + _1bbb_ = get_cpu_unit_code ("1b_1bbb."); + _1mbb_ = get_cpu_unit_code ("1b_1mbb."); + _1mib_ = get_cpu_unit_code ("1b_1mib."); + _1mmb_ = get_cpu_unit_code ("1b_1mmb."); + _1mfb_ = get_cpu_unit_code ("1b_1mfb."); + _1mlx_ = get_cpu_unit_code ("1b_1mlx."); + } schedule_ebbs (rtl_dump_file); + finish_bundle_states (); + if (ia64_tune == PROCESSOR_ITANIUM) + { + free (add_cycles); + free (clocks); + } + free (stops_p); + emit_insn_group_barriers (rtl_dump_file, insns); + ia64_final_schedule = 0; timevar_pop (TV_SCHED2); - - /* This relies on the NOTE_INSN_BASIC_BLOCK notes to be in the same - place as they were during scheduling. */ - emit_insn_group_barriers (rtl_dump_file, insns); - ia64_emit_nops (); } else emit_all_insn_group_barriers (rtl_dump_file, insns); diff --git a/gcc/config/ia64/ia64.h b/gcc/config/ia64/ia64.h index 091510c387c..970827bbf4d 100644 --- a/gcc/config/ia64/ia64.h +++ b/gcc/config/ia64/ia64.h @@ -95,6 +95,8 @@ extern int target_flags; #define MASK_DWARF2_ASM 0x40000000 /* test dwarf2 line info via gas. */ +#define MASK_EARLY_STOP_BITS 0x00002000 /* tune stop bits for the model. */ + #define TARGET_BIG_ENDIAN (target_flags & MASK_BIG_ENDIAN) #define TARGET_GNU_AS (target_flags & MASK_GNU_AS) @@ -137,6 +139,7 @@ extern int ia64_tls_size; #define TARGET_TLS14 (ia64_tls_size == 14) #define TARGET_TLS22 (ia64_tls_size == 22) #define TARGET_TLS64 (ia64_tls_size == 64) +#define TARGET_EARLY_STOP_BITS (target_flags & MASK_EARLY_STOP_BITS) #define TARGET_HPUX_LD 0 @@ -188,6 +191,10 @@ extern int ia64_tls_size; N_("Enable Dwarf 2 line debug info via GNU as")}, \ { "no-dwarf2-asm", -MASK_DWARF2_ASM, \ N_("Disable Dwarf 2 line debug info via GNU as")}, \ + { "early-stop-bits", MASK_EARLY_STOP_BITS, \ + N_("Enable earlier placing stop bits for better scheduling")}, \ + { "no-early-stop-bits", -MASK_EARLY_STOP_BITS, \ + N_("Disable earlier placing stop bits")}, \ SUBTARGET_SWITCHES \ { "", TARGET_DEFAULT | TARGET_CPU_DEFAULT, \ NULL } \ @@ -213,12 +220,30 @@ extern int ia64_tls_size; extern const char *ia64_fixed_range_string; extern const char *ia64_tls_size_string; + +/* Which processor to schedule for. The cpu attribute defines a list + that mirrors this list, so changes to i64.md must be made at the + same time. */ + +enum processor_type +{ + PROCESSOR_ITANIUM, /* Original Itanium. */ + PROCESSOR_ITANIUM2, + PROCESSOR_max +}; + +extern enum processor_type ia64_tune; + +extern const char *ia64_tune_string; + #define TARGET_OPTIONS \ { \ { "fixed-range=", &ia64_fixed_range_string, \ N_("Specify range of registers to make fixed")}, \ { "tls-size=", &ia64_tls_size_string, \ N_("Specify bit size of immediate TLS offsets")}, \ + { "tune=", &ia64_tune_string, \ + N_("Schedule code for given CPU")}, \ } /* Sometimes certain combinations of command options do not make sense on a @@ -2485,4 +2510,9 @@ enum fetchop_code { #undef PROFILE_BEFORE_PROLOGUE #define PROFILE_BEFORE_PROLOGUE 1 + + +/* Switch on code for querying unit reservations. */ +#define CPU_UNITS_QUERY 1 + /* End of ia64.h */ diff --git a/gcc/config/ia64/ia64.md b/gcc/config/ia64/ia64.md index c2275494c25..a96e92fc900 100644 --- a/gcc/config/ia64/ia64.md +++ b/gcc/config/ia64/ia64.md @@ -91,6 +91,10 @@ ;; :: ;; :::::::::::::::::::: +;; Processor type. This attribute must exactly match the processor_type +;; enumeration in ia64.h. +(define_attr "cpu" "itanium,itanium2" (const (symbol_ref "ia64_tune"))) + ;; Instruction type. This primarily determines how instructions can be ;; packed in bundles, and secondarily affects scheduling to function units. @@ -110,8 +114,8 @@ (define_attr "itanium_class" "unknown,ignore,stop_bit,br,fcmp,fcvtfx,fld, fmac,fmisc,frar_i,frar_m,frbr,frfr,frpr,ialu,icmp,ilog,ishf,ld, chk_s,long_i,mmmul,mmshf,mmshfi,rse_m,scall,sem,stf,st,syst_m0, - syst_m,tbit,toar_i,toar_m,tobr,tofr,topr,xmpy,xtd,nop_b,nop_f, - nop_i,nop_m,nop_x,lfetch" + syst_m,tbit,toar_i,toar_m,tobr,tofr,topr,xmpy,xtd,nop,nop_b,nop_f, + nop_i,nop_m,nop_x,lfetch,pre_cycle" (const_string "unknown")) ;; chk_s has an I and an M form; use type A for convenience. @@ -146,76 +150,23 @@ (define_attr "predicable" "no,yes" (const_string "yes")) -;; :::::::::::::::::::: -;; :: -;; :: Function Units -;; :: -;; :::::::::::::::::::: -;; We define 6 "dummy" functional units. All the real work to decide which -;; insn uses which unit is done by our MD_SCHED_REORDER hooks. We only -;; have to ensure here that there are enough copies of the dummy unit so -;; that the scheduler doesn't get confused by MD_SCHED_REORDER. -;; Other than the 6 dummies for normal insns, we also add a single dummy unit -;; for stop bits. - -(define_function_unit "dummy" 6 1 (eq_attr "itanium_class" "br") 0 0) -(define_function_unit "dummy" 6 1 (eq_attr "itanium_class" "scall") 0 0) -(define_function_unit "dummy" 6 1 (eq_attr "itanium_class" "fcmp") 2 0) -(define_function_unit "dummy" 6 1 (eq_attr "itanium_class" "fcvtfx") 7 0) -(define_function_unit "dummy" 6 1 (eq_attr "itanium_class" "fld") 9 0) -(define_function_unit "dummy" 6 1 (eq_attr "itanium_class" "fmac") 5 0) -(define_function_unit "dummy" 6 1 (eq_attr "itanium_class" "fmisc") 5 0) - -;; There is only one insn `mov = ar.bsp' for frar_i: -(define_function_unit "dummy" 6 1 (eq_attr "itanium_class" "frar_i") 13 0) -;; There is only ony insn `mov = ar.unat' for frar_m: -(define_function_unit "dummy" 6 1 (eq_attr "itanium_class" "frar_m") 6 0) -(define_function_unit "dummy" 6 1 (eq_attr "itanium_class" "frbr") 2 0) -(define_function_unit "dummy" 6 1 (eq_attr "itanium_class" "frfr") 2 0) -(define_function_unit "dummy" 6 1 (eq_attr "itanium_class" "frpr") 2 0) - -(define_function_unit "dummy" 6 1 (eq_attr "itanium_class" "ialu") 1 0) -(define_function_unit "dummy" 6 1 (eq_attr "itanium_class" "icmp") 1 0) -(define_function_unit "dummy" 6 1 (eq_attr "itanium_class" "ilog") 1 0) -(define_function_unit "dummy" 6 1 (eq_attr "itanium_class" "ishf") 1 0) -(define_function_unit "dummy" 6 1 (eq_attr "itanium_class" "ld") 2 0) -(define_function_unit "dummy" 6 1 (eq_attr "itanium_class" "long_i") 1 0) -(define_function_unit "dummy" 6 1 (eq_attr "itanium_class" "mmmul") 2 0) -(define_function_unit "dummy" 6 1 (eq_attr "itanium_class" "mmshf") 2 0) -(define_function_unit "dummy" 6 1 (eq_attr "itanium_class" "mmshfi") 2 0) - -;; Now we have only one insn (flushrs) of such class. We assume that flushrs -;; is the 1st syllable of the bundle after stop bit. -(define_function_unit "dummy" 6 1 (eq_attr "itanium_class" "rse_m") 0 0) -(define_function_unit "dummy" 6 1 (eq_attr "itanium_class" "sem") 11 0) -(define_function_unit "dummy" 6 1 (eq_attr "itanium_class" "stf") 1 0) -(define_function_unit "dummy" 6 1 (eq_attr "itanium_class" "st") 1 0) -(define_function_unit "dummy" 6 1 (eq_attr "itanium_class" "syst_m0") 1 0) -;; Now we use only one insn `mf'. Therfore latency time is set up to 0. -(define_function_unit "dummy" 6 1 (eq_attr "itanium_class" "syst_m") 0 0) -(define_function_unit "dummy" 6 1 (eq_attr "itanium_class" "tbit") 1 0) - -;; There is only one insn `mov ar.pfs =' for toar_i therefore we use -;; latency time equal to 0: -(define_function_unit "dummy" 6 1 (eq_attr "itanium_class" "toar_i") 0 0) -;; There are only ony 2 insns `mov ar.ccv =' and `mov ar.unat =' for toar_m: -(define_function_unit "dummy" 6 1 (eq_attr "itanium_class" "toar_m") 5 0) -(define_function_unit "dummy" 6 1 (eq_attr "itanium_class" "tobr") 1 0) -(define_function_unit "dummy" 6 1 (eq_attr "itanium_class" "tofr") 9 0) -(define_function_unit "dummy" 6 1 (eq_attr "itanium_class" "topr") 1 0) -(define_function_unit "dummy" 6 1 (eq_attr "itanium_class" "xmpy") 7 0) -(define_function_unit "dummy" 6 1 (eq_attr "itanium_class" "xtd") 1 0) - -(define_function_unit "dummy" 6 1 (eq_attr "itanium_class" "nop_m") 0 0) -(define_function_unit "dummy" 6 1 (eq_attr "itanium_class" "nop_i") 0 0) -(define_function_unit "dummy" 6 1 (eq_attr "itanium_class" "nop_f") 0 0) -(define_function_unit "dummy" 6 1 (eq_attr "itanium_class" "nop_b") 0 0) -(define_function_unit "dummy" 6 1 (eq_attr "itanium_class" "nop_x") 0 0) - -(define_function_unit "stop_bit" 1 1 (eq_attr "itanium_class" "stop_bit") 0 0) -(define_function_unit "dummy" 6 1 (eq_attr "itanium_class" "ignore") 0 0) -(define_function_unit "dummy" 6 1 (eq_attr "itanium_class" "unknown") 0 0) +;; DFA descriptions of ia64 processors used for insn scheduling and +;; bundling. + +(automata_option "ndfa") + +;; Uncomment the following line to output automata for debugging. +;; (automata_option "v") + +(automata_option "w") + +;;(automata_option "no-minimization") + + +(include "itanium1.md") +(include "itanium2.md") + ;; :::::::::::::::::::: ;; :: @@ -5089,7 +5040,7 @@ [(const_int 0)] "" "nop 0" - [(set_attr "itanium_class" "unknown")]) + [(set_attr "itanium_class" "nop")]) (define_insn "nop_m" [(const_int 1)] @@ -5121,6 +5072,14 @@ "" [(set_attr "itanium_class" "nop_x")]) +;; The following insn will be never generated. It is used only by +;; insn scheduler to change state before advancing cycle. +(define_insn "pre_cycle" + [(const_int 6)] + "" + "" + [(set_attr "itanium_class" "pre_cycle")]) + (define_insn "bundle_selector" [(unspec [(match_operand 0 "const_int_operand" "")] UNSPEC_BUNDLE_SELECTOR)] "" diff --git a/gcc/config/ia64/itanium1.md b/gcc/config/ia64/itanium1.md new file mode 100644 index 00000000000..2728ed3de25 --- /dev/null +++ b/gcc/config/ia64/itanium1.md @@ -0,0 +1,1616 @@ +;; Itanium1 (original Itanium) DFA descriptions for insn scheduling +;; and bundling. +;; Copyright (C) 2002 Free Software Foundation, Inc. +;; Contributed by Vladimir Makarov <vmakarov@redhat.com>. +;; +;; This file is part of GNU CC. +;; +;; GNU CC is free software; you can redistribute it and/or modify +;; it under the terms of the GNU General Public License as published by +;; the Free Software Foundation; either version 2, or (at your option) +;; any later version. +;; +;; GNU CC is distributed in the hope that it will be useful, +;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;; GNU General Public License for more details. +;; +;; You should have received a copy of the GNU General Public License +;; along with GNU CC; see the file COPYING. If not, write to +;; the Free Software Foundation, 59 Temple Place - Suite 330, +;; Boston, MA 02111-1307, USA. */ +;; + + +/* This is description of pipeline hazards based on DFA. The + following constructions can be used for this: + + o define_cpu_unit string [string]) describes a cpu functional unit + (separated by comma). + + 1st operand: Names of cpu function units. + 2nd operand: Name of automaton (see comments for + DEFINE_AUTOMATON). + + All define_reservations and define_cpu_units should have unique + names which can not be "nothing". + + o (exclusion_set string string) means that each CPU function unit + in the first string can not be reserved simultaneously with each + unit whose name is in the second string and vise versa. CPU + units in the string are separated by commas. For example, it is + useful for description CPU with fully pipelined floating point + functional unit which can execute simultaneously only single + floating point insns or only double floating point insns. + + o (presence_set string string) means that each CPU function unit in + the first string can not be reserved unless at least one of + pattern of units whose names are in the second string is + reserved. This is an asymmetric relation. CPU units or unit + patterns in the strings are separated by commas. Pattern is one + unit name or unit names separated by white-spaces. + + For example, it is useful for description that slot1 is reserved + after slot0 reservation for a VLIW processor. We could describe + it by the following construction + + (presence_set "slot1" "slot0") + + Or slot1 is reserved only after slot0 and unit b0 reservation. + In this case we could write + + (presence_set "slot1" "slot0 b0") + + All CPU functional units in a set should belong to the same + automaton. + + o (final_presence_set string string) is analogous to + `presence_set'. The difference between them is when checking is + done. When an instruction is issued in given automaton state + reflecting all current and planned unit reservations, the + automaton state is changed. The first state is a source state, + the second one is a result state. Checking for `presence_set' is + done on the source state reservation, checking for + `final_presence_set' is done on the result reservation. This + construction is useful to describe a reservation which is + actually two subsequent reservations. For example, if we use + + (presence_set "slot1" "slot0") + + the following insn will be never issued (because slot1 requires + slot0 which is absent in the source state). + + (define_reservation "insn_and_nop" "slot0 + slot1") + + but it can be issued if we use analogous `final_presence_set'. + + o (absence_set string string) means that each CPU function unit in + the first string can be reserved only if each pattern of units + whose names are in the second string is not reserved. This is an + asymmetric relation (actually exclusion set is analogous to this + one but it is symmetric). CPU units or unit patterns in the + string are separated by commas. Pattern is one unit name or unit + names separated by white-spaces. + + For example, it is useful for description that slot0 can not be + reserved after slot1 or slot2 reservation for a VLIW processor. + We could describe it by the following construction + + (absence_set "slot2" "slot0, slot1") + + Or slot2 can not be reserved if slot0 and unit b0 are reserved or + slot1 and unit b1 are reserved . In this case we could write + + (absence_set "slot2" "slot0 b0, slot1 b1") + + All CPU functional units in a set should to belong the same + automaton. + + o (final_absence_set string string) is analogous to `absence_set' but + checking is done on the result (state) reservation. See comments + for final_presence_set. + + o (define_bypass number out_insn_names in_insn_names) names bypass with + given latency (the first number) from insns given by the first + string (see define_insn_reservation) into insns given by the + second string. Insn names in the strings are separated by + commas. + + o (define_automaton string) describes names of an automaton + generated and used for pipeline hazards recognition. The names + are separated by comma. Actually it is possibly to generate the + single automaton but unfortunately it can be very large. If we + use more one automata, the summary size of the automata usually + is less than the single one. The automaton name is used in + define_cpu_unit. All automata should have unique names. + + o (automata_option string) describes option for generation of + automata. Currently there are the following options: + + o "no-minimization" which makes no minimization of automata. + This is only worth to do when we are debugging the description + and need to look more accurately at reservations of states. + + o "ndfa" which makes automata with nondetermenistic reservation + by insns. + + o (define_reservation string string) names reservation (the first + string) of cpu functional units (the 2nd string). Sometimes unit + reservations for different insns contain common parts. In such + case, you describe common part and use one its name (the 1st + parameter) in regular expression in define_insn_reservation. All + define_reservations, define results and define_cpu_units should + have unique names which can not be "nothing". + + o (define_insn_reservation name default_latency condition regexpr) + describes reservation of cpu functional units (the 3nd operand) + for instruction which is selected by the condition (the 2nd + parameter). The first parameter is used for output of debugging + information. The reservations are described by a regular + expression according the following syntax: + + regexp = regexp "," oneof + | oneof + + oneof = oneof "|" allof + | allof + + allof = allof "+" repeat + | repeat + + repeat = element "*" number + | element + + element = cpu_function_name + | reservation_name + | result_name + | "nothing" + | "(" regexp ")" + + 1. "," is used for describing start of the next cycle in + reservation. + + 2. "|" is used for describing the reservation described by the + first regular expression *or* the reservation described by + the second regular expression *or* etc. + + 3. "+" is used for describing the reservation described by the + first regular expression *and* the reservation described by + the second regular expression *and* etc. + + 4. "*" is used for convinience and simply means sequence in + which the regular expression are repeated NUMBER times with + cycle advancing (see ","). + + 5. cpu function unit name which means reservation. + + 6. reservation name -- see define_reservation. + + 7. string "nothing" means no units reservation. + +*/ + +(define_automaton "one") + +;; All possible combinations of bundles/syllables +(define_cpu_unit "1_0m.ii, 1_0m.mi, 1_0m.fi, 1_0m.mf, 1_0b.bb, 1_0m.bb,\ + 1_0m.ib, 1_0m.mb, 1_0m.fb, 1_0m.lx" "one") +(define_cpu_unit "1_0mi.i, 1_0mm.i, 1_0mf.i, 1_0mm.f, 1_0bb.b, 1_0mb.b,\ + 1_0mi.b, 1_0mm.b, 1_0mf.b, 1_0mlx." "one") +(define_cpu_unit "1_0mii., 1_0mmi., 1_0mfi., 1_0mmf., 1_0bbb., 1_0mbb.,\ + 1_0mib., 1_0mmb., 1_0mfb." "one") + +(define_cpu_unit "1_1m.ii, 1_1m.mi, 1_1m.fi, 1_1b.bb, 1_1m.bb,\ + 1_1m.ib, 1_1m.mb, 1_1m.fb, 1_1m.lx" "one") +(define_cpu_unit "1_1mi.i, 1_1mm.i, 1_1mf.i, 1_1bb.b, 1_1mb.b,\ + 1_1mi.b, 1_1mm.b, 1_1mf.b, 1_1mlx." "one") +(define_cpu_unit "1_1mii., 1_1mmi., 1_1mfi., 1_1bbb., 1_1mbb.,\ + 1_1mib., 1_1mmb., 1_1mfb." "one") + +;; Slot 1 +(exclusion_set "1_0m.ii" + "1_0m.mi, 1_0m.fi, 1_0m.mf, 1_0b.bb, 1_0m.bb, 1_0m.ib, 1_0m.mb, 1_0m.fb,\ + 1_0m.lx") +(exclusion_set "1_0m.mi" + "1_0m.fi, 1_0m.mf, 1_0b.bb, 1_0m.bb, 1_0m.ib, 1_0m.mb, 1_0m.fb, 1_0m.lx") +(exclusion_set "1_0m.fi" + "1_0m.mf, 1_0b.bb, 1_0m.bb, 1_0m.ib, 1_0m.mb, 1_0m.fb, 1_0m.lx") +(exclusion_set "1_0m.mf" + "1_0b.bb, 1_0m.bb, 1_0m.ib, 1_0m.mb, 1_0m.fb, 1_0m.lx") +(exclusion_set "1_0b.bb" "1_0m.bb, 1_0m.ib, 1_0m.mb, 1_0m.fb, 1_0m.lx") +(exclusion_set "1_0m.bb" "1_0m.ib, 1_0m.mb, 1_0m.fb, 1_0m.lx") +(exclusion_set "1_0m.ib" "1_0m.mb, 1_0m.fb, 1_0m.lx") +(exclusion_set "1_0m.mb" "1_0m.fb, 1_0m.lx") +(exclusion_set "1_0m.fb" "1_0m.lx") + +;; Slot 2 +(exclusion_set "1_0mi.i" + "1_0mm.i, 1_0mf.i, 1_0mm.f, 1_0bb.b, 1_0mb.b, 1_0mi.b, 1_0mm.b, 1_0mf.b,\ + 1_0mlx.") +(exclusion_set "1_0mm.i" + "1_0mf.i, 1_0mm.f, 1_0bb.b, 1_0mb.b, 1_0mi.b, 1_0mm.b, 1_0mf.b, 1_0mlx.") +(exclusion_set "1_0mf.i" + "1_0mm.f, 1_0bb.b, 1_0mb.b, 1_0mi.b, 1_0mm.b, 1_0mf.b, 1_0mlx.") +(exclusion_set "1_0mm.f" + "1_0bb.b, 1_0mb.b, 1_0mi.b, 1_0mm.b, 1_0mf.b, 1_0mlx.") +(exclusion_set "1_0bb.b" "1_0mb.b, 1_0mi.b, 1_0mm.b, 1_0mf.b, 1_0mlx.") +(exclusion_set "1_0mb.b" "1_0mi.b, 1_0mm.b, 1_0mf.b, 1_0mlx.") +(exclusion_set "1_0mi.b" "1_0mm.b, 1_0mf.b, 1_0mlx.") +(exclusion_set "1_0mm.b" "1_0mf.b, 1_0mlx.") +(exclusion_set "1_0mf.b" "1_0mlx.") + +;; Slot 3 +(exclusion_set "1_0mii." + "1_0mmi., 1_0mfi., 1_0mmf., 1_0bbb., 1_0mbb., 1_0mib., 1_0mmb., 1_0mfb.,\ + 1_0mlx.") +(exclusion_set "1_0mmi." + "1_0mfi., 1_0mmf., 1_0bbb., 1_0mbb., 1_0mib., 1_0mmb., 1_0mfb., 1_0mlx.") +(exclusion_set "1_0mfi." + "1_0mmf., 1_0bbb., 1_0mbb., 1_0mib., 1_0mmb., 1_0mfb., 1_0mlx.") +(exclusion_set "1_0mmf." + "1_0bbb., 1_0mbb., 1_0mib., 1_0mmb., 1_0mfb., 1_0mlx.") +(exclusion_set "1_0bbb." "1_0mbb., 1_0mib., 1_0mmb., 1_0mfb., 1_0mlx.") +(exclusion_set "1_0mbb." "1_0mib., 1_0mmb., 1_0mfb., 1_0mlx.") +(exclusion_set "1_0mib." "1_0mmb., 1_0mfb., 1_0mlx.") +(exclusion_set "1_0mmb." "1_0mfb., 1_0mlx.") +(exclusion_set "1_0mfb." "1_0mlx.") + +;; Slot 4 +(exclusion_set "1_1m.ii" + "1_1m.mi, 1_1m.fi, 1_1b.bb, 1_1m.bb, 1_1m.ib, 1_1m.mb, 1_1m.fb, 1_1m.lx") +(exclusion_set "1_1m.mi" + "1_1m.fi, 1_1b.bb, 1_1m.bb, 1_1m.ib, 1_1m.mb, 1_1m.fb, 1_1m.lx") +(exclusion_set "1_1m.fi" + "1_1b.bb, 1_1m.bb, 1_1m.ib, 1_1m.mb, 1_1m.fb, 1_1m.lx") +(exclusion_set "1_1b.bb" "1_1m.bb, 1_1m.ib, 1_1m.mb, 1_1m.fb, 1_1m.lx") +(exclusion_set "1_1m.bb" "1_1m.ib, 1_1m.mb, 1_1m.fb, 1_1m.lx") +(exclusion_set "1_1m.ib" "1_1m.mb, 1_1m.fb, 1_1m.lx") +(exclusion_set "1_1m.mb" "1_1m.fb, 1_1m.lx") +(exclusion_set "1_1m.fb" "1_1m.lx") + +;; Slot 5 +(exclusion_set "1_1mi.i" + "1_1mm.i, 1_1mf.i, 1_1bb.b, 1_1mb.b, 1_1mi.b, 1_1mm.b, 1_1mf.b, 1_1mlx.") +(exclusion_set "1_1mm.i" + "1_1mf.i, 1_1bb.b, 1_1mb.b, 1_1mi.b, 1_1mm.b, 1_1mf.b, 1_1mlx.") +(exclusion_set "1_1mf.i" + "1_1bb.b, 1_1mb.b, 1_1mi.b, 1_1mm.b, 1_1mf.b, 1_1mlx.") +(exclusion_set "1_1bb.b" "1_1mb.b, 1_1mi.b, 1_1mm.b, 1_1mf.b, 1_1mlx.") +(exclusion_set "1_1mb.b" "1_1mi.b, 1_1mm.b, 1_1mf.b, 1_1mlx.") +(exclusion_set "1_1mi.b" "1_1mm.b, 1_1mf.b, 1_1mlx.") +(exclusion_set "1_1mm.b" "1_1mf.b, 1_1mlx.") +(exclusion_set "1_1mf.b" "1_1mlx.") + +;; Slot 6 +(exclusion_set "1_1mii." + "1_1mmi., 1_1mfi., 1_1bbb., 1_1mbb., 1_1mib., 1_1mmb., 1_1mfb., 1_1mlx.") +(exclusion_set "1_1mmi." + "1_1mfi., 1_1bbb., 1_1mbb., 1_1mib., 1_1mmb., 1_1mfb., 1_1mlx.") +(exclusion_set "1_1mfi." + "1_1bbb., 1_1mbb., 1_1mib., 1_1mmb., 1_1mfb., 1_1mlx.") +(exclusion_set "1_1bbb." "1_1mbb., 1_1mib., 1_1mmb., 1_1mfb., 1_1mlx.") +(exclusion_set "1_1mbb." "1_1mib., 1_1mmb., 1_1mfb., 1_1mlx.") +(exclusion_set "1_1mib." "1_1mmb., 1_1mfb., 1_1mlx.") +(exclusion_set "1_1mmb." "1_1mfb., 1_1mlx.") +(exclusion_set "1_1mfb." "1_1mlx.") + +(final_presence_set "1_0mi.i" "1_0m.ii") +(final_presence_set "1_0mii." "1_0mi.i") +(final_presence_set "1_1mi.i" "1_1m.ii") +(final_presence_set "1_1mii." "1_1mi.i") + +(final_presence_set "1_0mm.i" "1_0m.mi") +(final_presence_set "1_0mmi." "1_0mm.i") +(final_presence_set "1_1mm.i" "1_1m.mi") +(final_presence_set "1_1mmi." "1_1mm.i") + +(final_presence_set "1_0mf.i" "1_0m.fi") +(final_presence_set "1_0mfi." "1_0mf.i") +(final_presence_set "1_1mf.i" "1_1m.fi") +(final_presence_set "1_1mfi." "1_1mf.i") + +(final_presence_set "1_0mm.f" "1_0m.mf") +(final_presence_set "1_0mmf." "1_0mm.f") + +(final_presence_set "1_0bb.b" "1_0b.bb") +(final_presence_set "1_0bbb." "1_0bb.b") +(final_presence_set "1_1bb.b" "1_1b.bb") +(final_presence_set "1_1bbb." "1_1bb.b") + +(final_presence_set "1_0mb.b" "1_0m.bb") +(final_presence_set "1_0mbb." "1_0mb.b") +(final_presence_set "1_1mb.b" "1_1m.bb") +(final_presence_set "1_1mbb." "1_1mb.b") + +(final_presence_set "1_0mi.b" "1_0m.ib") +(final_presence_set "1_0mib." "1_0mi.b") +(final_presence_set "1_1mi.b" "1_1m.ib") +(final_presence_set "1_1mib." "1_1mi.b") + +(final_presence_set "1_0mm.b" "1_0m.mb") +(final_presence_set "1_0mmb." "1_0mm.b") +(final_presence_set "1_1mm.b" "1_1m.mb") +(final_presence_set "1_1mmb." "1_1mm.b") + +(final_presence_set "1_0mf.b" "1_0m.fb") +(final_presence_set "1_0mfb." "1_0mf.b") +(final_presence_set "1_1mf.b" "1_1m.fb") +(final_presence_set "1_1mfb." "1_1mf.b") + +(final_presence_set "1_0mlx." "1_0m.lx") +(final_presence_set "1_1mlx." "1_1m.lx") + +(final_presence_set + "1_1m.ii,1_1m.mi,1_1m.fi,1_1b.bb,1_1m.bb,1_1m.ib,1_1m.mb,1_1m.fb,1_1m.lx" + "1_0mii.,1_0mmi.,1_0mfi.,1_0mmf.,1_0bbb.,1_0mbb.,1_0mib.,1_0mmb.,1_0mfb.,\ + 1_0mlx.") + +;; Microarchitecture units: +(define_cpu_unit + "1_um0, 1_um1, 1_ui0, 1_ui1, 1_uf0, 1_uf1, 1_ub0, 1_ub1, 1_ub2,\ + 1_unb0, 1_unb1, 1_unb2" "one") + +(exclusion_set "1_ub0" "1_unb0") +(exclusion_set "1_ub1" "1_unb1") +(exclusion_set "1_ub2" "1_unb2") + +;; The following rules are used to decrease number of alternatives. +;; They are consequences of Itanium microarchitecture. They also +;; describe the following rules mentioned in Itanium +;; microarchitecture: rules mentioned in Itanium microarchitecture: +;; o "MMF: Always splits issue before the first M and after F regardless +;; of surrounding bundles and stops". +;; o "BBB/MBB: Always splits issue after either of these bundles". +;; o "MIB BBB: Split issue after the first bundle in this pair". + +(exclusion_set "1_0m.mf,1_0mm.f,1_0mmf." + "1_1m.ii,1_1m.mi,1_1m.fi,1_1b.bb,1_1m.bb,1_1m.ib,1_1m.mb,1_1m.fb,1_1m.lx") +(exclusion_set "1_0b.bb,1_0bb.b,1_0bbb.,1_0m.bb,1_0mb.b,1_0mbb." + "1_1m.ii,1_1m.mi,1_1m.fi,1_1b.bb,1_1m.bb,1_1m.ib,1_1m.mb,1_1m.fb,1_1m.lx") +(exclusion_set "1_0m.ib,1_0mi.b,1_0mib." "1_1b.bb") + +;; For exceptions of M, I, B, F insns: +(define_cpu_unit "1_not_um1, 1_not_ui1, 1_not_uf1" "one") + +(final_absence_set "1_not_um1" "1_um1") +(final_absence_set "1_not_ui1" "1_ui1") +(final_absence_set "1_not_uf1" "1_uf1") + +;;; "MIB/MFB/MMB: Splits issue after any of these bundles unless the +;;; B-slot contains a nop.b or a brp instruction". +;;; "The B in an MIB/MFB/MMB bundle disperses to B0 if it is a brp or +;;; nop.b, otherwise it disperses to B2". +(final_absence_set + "1_1m.ii, 1_1m.mi, 1_1m.fi, 1_1b.bb, 1_1m.bb, 1_1m.ib, 1_1m.mb, 1_1m.fb,\ + 1_1m.lx" + "1_0mib. 1_ub2, 1_0mfb. 1_ub2, 1_0mmb. 1_ub2") + +;; This is necessary to start new processor cycle when we meet stop bit. +(define_cpu_unit "1_stop" "one") +(final_absence_set + "1_0m.ii,1_0mi.i,1_0mii.,1_0m.mi,1_0mm.i,1_0mmi.,1_0m.fi,1_0mf.i,1_0mfi.,\ + 1_0m.mf,1_0mm.f,1_0mmf.,1_0b.bb,1_0bb.b,1_0bbb.,1_0m.bb,1_0mb.b,1_0mbb.,\ + 1_0m.ib,1_0mi.b,1_0mib.,1_0m.mb,1_0mm.b,1_0mmb.,1_0m.fb,1_0mf.b,1_0mfb.,\ + 1_0m.lx,1_0mlx., \ + 1_1m.ii,1_1mi.i,1_1mii.,1_1m.mi,1_1mm.i,1_1mmi.,1_1m.fi,1_1mf.i,1_1mfi.,\ + 1_1b.bb,1_1bb.b,1_1bbb.,1_1m.bb,1_1mb.b,1_1mbb.,1_1m.ib,1_1mi.b,1_1mib.,\ + 1_1m.mb,1_1mm.b,1_1mmb.,1_1m.fb,1_1mf.b,1_1mfb.,1_1m.lx,1_1mlx." + "1_stop") + +;; M and I instruction is dispersed to the lowest numbered M or I unit +;; not already in use. An I slot in the 3rd position of 2nd bundle is +;; always dispersed to I1 +(final_presence_set "1_um1" "1_um0") +(final_presence_set "1_ui1" "1_ui0, 1_1mii., 1_1mmi., 1_1mfi.") + +;; Insns + +;; M and I instruction is dispersed to the lowest numbered M or I unit +;; not already in use. An I slot in the 3rd position of 2nd bundle is +;; always dispersed to I1 +(define_reservation "1_M0" + "1_0m.ii+1_um0|1_0m.mi+1_um0|1_0mm.i+(1_um0|1_um1)\ + |1_0m.fi+1_um0|1_0m.mf+1_um0|1_0mm.f+1_um1\ + |1_0m.bb+1_um0|1_0m.ib+1_um0|1_0m.mb+1_um0\ + |1_0mm.b+1_um1|1_0m.fb+1_um0|1_0m.lx+1_um0\ + |1_1mm.i+1_um1|1_1mm.b+1_um1\ + |(1_1m.ii|1_1m.mi|1_1m.fi|1_1m.bb|1_1m.ib|1_1m.mb|1_1m.fb|1_1m.lx)\ + +(1_um0|1_um1)") + +(define_reservation "1_M1" + "(1_0mii.+(1_ui0|1_ui1)|1_0mmi.+1_ui0|1_0mfi.+1_ui0\ + |1_0mib.+1_unb0|1_0mfb.+1_unb0|1_0mmb.+1_unb0)\ + +(1_1m.ii|1_1m.mi|1_1m.fi|1_1m.bb|1_1m.ib|1_1m.mb|1_1m.fb|1_1m.lx)\ + +(1_um0|1_um1)") + +(define_reservation "1_M" "1_M0|1_M1") + +;; Exceptions for dispersal rules. +;; "An I slot in the 3rd position of 2nd bundle is always dispersed to I1". +(define_reservation "1_I0" + "1_0mi.i+1_ui0|1_0mii.+(1_ui0|1_ui1)|1_0mmi.+1_ui0|1_0mfi.+1_ui0\ + |1_0mi.b+1_ui0|(1_1mi.i|1_1mi.b)+(1_ui0|1_ui1)\ + |1_1mii.+1_ui1|1_1mmi.+1_ui1|1_1mfi.+1_ui1") + +(define_reservation "1_I1" + "1_0m.ii+1_um0+1_0mi.i+1_ui0|1_0mm.i+(1_um0|1_um1)+1_0mmi.+1_ui0\ + |1_0mf.i+1_uf0+1_0mfi.+1_ui0|1_0m.ib+1_um0+1_0mi.b+1_ui0\ + |(1_1m.ii+(1_um0|1_um1)+1_1mi.i\ + |1_1m.ib+(1_um0|1_um1)+1_1mi.b)+(1_ui0|1_ui1)\ + |1_1mm.i+1_um1+1_1mmi.+1_ui1|1_1mf.i+1_uf1+1_1mfi.+1_ui1") + +(define_reservation "1_I" "1_I0|1_I1") + +;; "An F slot in the 1st bundle disperses to F0". +;; "An F slot in the 2st bundle disperses to F1". +(define_reservation "1_F0" + "1_0mf.i+1_uf0|1_0mmf.+1_uf0|1_0mf.b+1_uf0|1_1mf.i+1_uf1|1_1mf.b+1_uf1") + +(define_reservation "1_F1" + "1_0m.fi+1_um0+1_0mf.i+1_uf0|1_0mm.f+(1_um0|1_um1)+1_0mmf.+1_uf0\ + |1_0m.fb+1_um0+1_0mf.b+1_uf0|1_1m.fi+(1_um0|1_um1)+1_1mf.i+1_uf1\ + |1_1m.fb+(1_um0|1_um1)+1_1mf.b+1_uf1") + +(define_reservation "1_F2" + "1_0m.mf+1_um0+1_0mm.f+1_um1+1_0mmf.+1_uf0\ + |(1_0mii.+(1_ui0|1_ui1)|1_0mmi.+1_ui0|1_0mfi.+1_ui0\ + |1_0mib.+1_unb0|1_0mmb.+1_unb0|1_0mfb.+1_unb0)\ + +(1_1m.fi+(1_um0|1_um1)+1_1mf.i+1_uf1\ + |1_1m.fb+(1_um0|1_um1)+1_1mf.b+1_uf1)") + +(define_reservation "1_F" "1_F0|1_F1|1_F2") + +;;; "Each B slot in MBB or BBB bundle disperses to the corresponding B +;;; unit. That is, a B slot in 1st position is despersed to B0. In the +;;; 2nd position it is dispersed to B2". +(define_reservation "1_NB" + "1_0b.bb+1_unb0|1_0bb.b+1_unb1|1_0bbb.+1_unb2\ + |1_0mb.b+1_unb1|1_0mbb.+1_unb2\ + |1_0mib.+1_unb0|1_0mmb.+1_unb0|1_0mfb.+1_unb0\ + |1_1b.bb+1_unb0|1_1bb.b+1_unb1\ + |1_1bbb.+1_unb2|1_1mb.b+1_unb1|1_1mbb.+1_unb2|1_1mib.+1_unb0\ + |1_1mmb.+1_unb0|1_1mfb.+1_unb0") + +(define_reservation "1_B0" + "1_0b.bb+1_ub0|1_0bb.b+1_ub1|1_0bbb.+1_ub2\ + |1_0mb.b+1_ub1|1_0mbb.+1_ub2|1_0mib.+1_ub2\ + |1_0mfb.+1_ub2|1_1b.bb+1_ub0|1_1bb.b+1_ub1\ + |1_1bbb.+1_ub2|1_1mb.b+1_ub1\ + |1_1mib.+1_ub2|1_1mmb.+1_ub2|1_1mfb.+1_ub2") + +(define_reservation "1_B1" + "1_0m.bb+1_um0+1_0mb.b+1_ub1|1_0mi.b+1_ui0+1_0mib.+1_ub2\ + |1_0mf.b+1_uf0+1_0mfb.+1_ub2\ + |(1_0mii.+(1_ui0|1_ui1)|1_0mmi.+1_ui0|1_0mfi.+1_ui0)+1_1b.bb+1_ub0\ + |1_1m.bb+(1_um0|1_um1)+1_1mb.b+1_ub1\ + |1_1mi.b+(1_ui0|1_ui1)+1_1mib.+1_ub2\ + |1_1mm.b+1_um1+1_1mmb.+1_ub2\ + |1_1mf.b+1_uf1+1_1mfb.+1_ub2") + +(define_reservation "1_B" "1_B0|1_B1") + +;; MLX bunlde uses ports equivalent to MFI bundles. +(define_reservation "1_L0" "1_0mlx.+1_ui0+1_uf0|1_1mlx.+(1_ui0|1_ui1)+1_uf1") +(define_reservation "1_L1" + "1_0m.lx+1_um0+1_0mlx.+1_ui0+1_uf0\ + |1_1m.lx+(1_um0|1_um1)+1_1mlx.+(1_ui0|1_ui1)+1_uf1") +(define_reservation "1_L2" + "(1_0mii.+(1_ui0|1_ui1)|1_0mmi.+1_ui0|1_0mfi.+1_ui0\ + |1_0mib.+1_unb0|1_0mmb.+1_unb0|1_0mfb.+1_unb0) + +1_1m.lx+(1_um0|1_um1)+1_1mlx.+1_ui1+1_uf1") +(define_reservation "1_L" "1_L0|1_L1|1_L2") + +(define_reservation "1_A" "1_M|1_I") + +(define_insn_reservation "1_stop_bit" 0 + (and (and (eq_attr "cpu" "itanium") + (eq_attr "itanium_class" "stop_bit")) + (eq (symbol_ref "bundling_p") (const_int 0))) + "1_stop|1_m0_stop|1_m1_stop|1_mi0_stop|1_mi1_stop") + +(define_insn_reservation "1_br" 0 + (and (and (eq_attr "cpu" "itanium") + (eq_attr "itanium_class" "br")) + (eq (symbol_ref "bundling_p") (const_int 0))) "1_B") +(define_insn_reservation "1_scall" 0 + (and (and (eq_attr "cpu" "itanium") + (eq_attr "itanium_class" "scall")) + (eq (symbol_ref "bundling_p") (const_int 0))) "1_B") +(define_insn_reservation "1_fcmp" 2 + (and (and (eq_attr "cpu" "itanium") + (eq_attr "itanium_class" "fcmp")) + (eq (symbol_ref "bundling_p") (const_int 0))) + "1_F+1_not_uf1") +(define_insn_reservation "1_fcvtfx" 7 + (and (and (eq_attr "cpu" "itanium") + (eq_attr "itanium_class" "fcvtfx")) + (eq (symbol_ref "bundling_p") (const_int 0))) "1_F") +(define_insn_reservation "1_fld" 9 + (and (and (eq_attr "cpu" "itanium") + (eq_attr "itanium_class" "fld")) + (eq (symbol_ref "bundling_p") (const_int 0))) "1_M") +(define_insn_reservation "1_fmac" 5 + (and (and (eq_attr "cpu" "itanium") + (eq_attr "itanium_class" "fmac")) + (eq (symbol_ref "bundling_p") (const_int 0))) "1_F") +(define_insn_reservation "1_fmisc" 5 + (and (and (eq_attr "cpu" "itanium") + (eq_attr "itanium_class" "fmisc")) + (eq (symbol_ref "bundling_p") (const_int 0))) + "1_F+1_not_uf1") + +;; There is only one insn `mov = ar.bsp' for frar_i: +(define_insn_reservation "1_frar_i" 13 + (and (and (eq_attr "cpu" "itanium") + (eq_attr "itanium_class" "frar_i")) + (eq (symbol_ref "bundling_p") (const_int 0))) + "1_I+1_not_ui1") +;; There is only two insns `mov = ar.unat' or `mov = ar.ccv' for frar_m: +(define_insn_reservation "1_frar_m" 6 + (and (and (eq_attr "cpu" "itanium") + (eq_attr "itanium_class" "frar_m")) + (eq (symbol_ref "bundling_p") (const_int 0))) + "1_M+1_not_um1") +(define_insn_reservation "1_frbr" 2 + (and (and (eq_attr "cpu" "itanium") + (eq_attr "itanium_class" "frbr")) + (eq (symbol_ref "bundling_p") (const_int 0))) + "1_I+1_not_ui1") +(define_insn_reservation "1_frfr" 2 + (and (and (eq_attr "cpu" "itanium") + (eq_attr "itanium_class" "frfr")) + (eq (symbol_ref "bundling_p") (const_int 0))) + "1_M+1_not_um1") +(define_insn_reservation "1_frpr" 2 + (and (and (eq_attr "cpu" "itanium") + (eq_attr "itanium_class" "frpr")) + (eq (symbol_ref "bundling_p") (const_int 0))) + "1_I+1_not_ui1") + +(define_insn_reservation "1_ialu" 1 + (and (and (eq_attr "cpu" "itanium") + (eq_attr "itanium_class" "ialu")) + (eq (symbol_ref + "bundling_p || ia64_produce_address_p (insn)") + (const_int 0))) + "1_A") +(define_insn_reservation "1_ialu_addr" 1 + (and (and (eq_attr "cpu" "itanium") + (eq_attr "itanium_class" "ialu")) + (eq (symbol_ref + "!bundling_p && ia64_produce_address_p (insn)") + (const_int 1))) + "1_M") +(define_insn_reservation "1_icmp" 1 + (and (and (eq_attr "cpu" "itanium") + (eq_attr "itanium_class" "icmp")) + (eq (symbol_ref "bundling_p") (const_int 0))) "1_A") +(define_insn_reservation "1_ilog" 1 + (and (and (eq_attr "cpu" "itanium") + (eq_attr "itanium_class" "ilog")) + (eq (symbol_ref "bundling_p") (const_int 0))) "1_A") +(define_insn_reservation "1_ishf" 1 + (and (and (eq_attr "cpu" "itanium") + (eq_attr "itanium_class" "ishf")) + (eq (symbol_ref "bundling_p") (const_int 0))) + "1_I+1_not_ui1") +(define_insn_reservation "1_ld" 2 + (and (and (eq_attr "cpu" "itanium") + (eq_attr "itanium_class" "ld")) + (eq (symbol_ref "bundling_p") (const_int 0))) "1_M") +(define_insn_reservation "1_long_i" 1 + (and (and (eq_attr "cpu" "itanium") + (eq_attr "itanium_class" "long_i")) + (eq (symbol_ref "bundling_p") (const_int 0))) "1_L") +(define_insn_reservation "1_mmmul" 2 + (and (and (eq_attr "cpu" "itanium") + (eq_attr "itanium_class" "mmmul")) + (eq (symbol_ref "bundling_p") (const_int 0))) + "1_I+1_not_ui1") +(define_insn_reservation "1_mmshf" 2 + (and (and (eq_attr "cpu" "itanium") + (eq_attr "itanium_class" "mmshf")) + (eq (symbol_ref "bundling_p") (const_int 0))) "1_I") +(define_insn_reservation "1_mmshfi" 1 + (and (and (eq_attr "cpu" "itanium") + (eq_attr "itanium_class" "mmshfi")) + (eq (symbol_ref "bundling_p") (const_int 0))) "1_I") + +;; Now we have only one insn (flushrs) of such class. We assume that flushrs +;; is the 1st syllable of the bundle after stop bit. +(define_insn_reservation "1_rse_m" 0 + (and (and (eq_attr "cpu" "itanium") + (eq_attr "itanium_class" "rse_m")) + (eq (symbol_ref "bundling_p") (const_int 0))) + "(1_0m.ii|1_0m.mi|1_0m.fi|1_0m.mf|1_0b.bb|1_0m.bb\ + |1_0m.ib|1_0m.mb|1_0m.fb|1_0m.lx)+1_um0") +(define_insn_reservation "1_sem" 0 + (and (and (eq_attr "cpu" "itanium") + (eq_attr "itanium_class" "sem")) + (eq (symbol_ref "bundling_p") (const_int 0))) + "1_M+1_not_um1") +(define_insn_reservation "1_stf" 1 + (and (and (eq_attr "cpu" "itanium") + (eq_attr "itanium_class" "stf")) + (eq (symbol_ref "bundling_p") (const_int 0))) "1_M") +(define_insn_reservation "1_st" 1 + (and (and (eq_attr "cpu" "itanium") + (eq_attr "itanium_class" "st")) + (eq (symbol_ref "bundling_p") (const_int 0))) "1_M") +(define_insn_reservation "1_syst_m0" 0 + (and (and (eq_attr "cpu" "itanium") + (eq_attr "itanium_class" "syst_m0")) + (eq (symbol_ref "bundling_p") (const_int 0))) + "1_M+1_not_um1") +(define_insn_reservation "1_syst_m" 0 + (and (and (eq_attr "cpu" "itanium") + (eq_attr "itanium_class" "syst_m")) + (eq (symbol_ref "bundling_p") (const_int 0))) "1_M") +(define_insn_reservation "1_tbit" 1 + (and (and (eq_attr "cpu" "itanium") + (eq_attr "itanium_class" "tbit")) + (eq (symbol_ref "bundling_p") (const_int 0))) + "1_I+1_not_ui1") + +;; There is only ony insn `mov ar.pfs =' for toar_i: +(define_insn_reservation "1_toar_i" 0 + (and (and (eq_attr "cpu" "itanium") + (eq_attr "itanium_class" "toar_i")) + (eq (symbol_ref "bundling_p") (const_int 0))) + "1_I+1_not_ui1") +;; There are only ony 2 insns `mov ar.ccv =' and `mov ar.unat =' for toar_m: +(define_insn_reservation "1_toar_m" 5 + (and (and (eq_attr "cpu" "itanium") + (eq_attr "itanium_class" "toar_m")) + (eq (symbol_ref "bundling_p") (const_int 0))) + "1_M+1_not_um1") +(define_insn_reservation "1_tobr" 1 + (and (and (eq_attr "cpu" "itanium") + (eq_attr "itanium_class" "tobr")) + (eq (symbol_ref "bundling_p") (const_int 0))) + "1_I+1_not_ui1") +(define_insn_reservation "1_tofr" 9 + (and (and (eq_attr "cpu" "itanium") + (eq_attr "itanium_class" "tofr")) + (eq (symbol_ref "bundling_p") (const_int 0))) "1_M") +(define_insn_reservation "1_topr" 1 + (and (and (eq_attr "cpu" "itanium") + (eq_attr "itanium_class" "topr")) + (eq (symbol_ref "bundling_p") (const_int 0))) + "1_I+1_not_ui1") +(define_insn_reservation "1_xmpy" 7 + (and (and (eq_attr "cpu" "itanium") + (eq_attr "itanium_class" "xmpy")) + (eq (symbol_ref "bundling_p") (const_int 0))) "1_F") +(define_insn_reservation "1_xtd" 1 + (and (and (eq_attr "cpu" "itanium") + (eq_attr "itanium_class" "xtd")) + (eq (symbol_ref "bundling_p") (const_int 0))) "1_I") + +(define_insn_reservation "1_chk_s" 0 + (and (and (eq_attr "cpu" "itanium") + (eq_attr "itanium_class" "chk_s")) + (eq (symbol_ref "bundling_p") (const_int 0))) "1_A") +(define_insn_reservation "1_lfetch" 0 + (and (and (eq_attr "cpu" "itanium") + (eq_attr "itanium_class" "lfetch")) + (eq (symbol_ref "bundling_p") (const_int 0))) "1_M") + +(define_insn_reservation "1_nop_m" 0 + (and (and (eq_attr "cpu" "itanium") + (eq_attr "itanium_class" "nop_m")) + (eq (symbol_ref "bundling_p") (const_int 0))) "1_M0") +(define_insn_reservation "1_nop_b" 0 + (and (and (eq_attr "cpu" "itanium") + (eq_attr "itanium_class" "nop_b")) + (eq (symbol_ref "bundling_p") (const_int 0))) "1_NB") +(define_insn_reservation "1_nop_i" 0 + (and (and (eq_attr "cpu" "itanium") + (eq_attr "itanium_class" "nop_i")) + (eq (symbol_ref "bundling_p") (const_int 0))) "1_I0") +(define_insn_reservation "1_nop_f" 0 + (and (and (eq_attr "cpu" "itanium") + (eq_attr "itanium_class" "nop_f")) + (eq (symbol_ref "bundling_p") (const_int 0))) "1_F0") +(define_insn_reservation "1_nop_x" 0 + (and (and (eq_attr "cpu" "itanium") + (eq_attr "itanium_class" "nop_x")) + (eq (symbol_ref "bundling_p") (const_int 0))) "1_L0") + +;; We assume that there is no insn issued on the same cycle as unknown insn. +(define_cpu_unit "1_empty" "one") +(exclusion_set "1_empty" + "1_0m.ii,1_0m.mi,1_0m.fi,1_0m.mf,1_0b.bb,1_0m.bb,1_0m.ib,1_0m.mb,1_0m.fb,\ + 1_0m.lx") + +(define_insn_reservation "1_unknown" 1 + (and (and (eq_attr "cpu" "itanium") + (eq_attr "itanium_class" "unknown")) + (eq (symbol_ref "bundling_p") (const_int 0))) "1_empty") + +(define_insn_reservation "1_nop" 1 + (and (and (eq_attr "cpu" "itanium") + (eq_attr "itanium_class" "nop")) + (eq (symbol_ref "bundling_p") (const_int 0))) + "1_M0|1_NB|1_I0|1_F0") + +(define_insn_reservation "1_ignore" 0 + (and (and (eq_attr "cpu" "itanium") + (eq_attr "itanium_class" "ignore")) + (eq (symbol_ref "bundling_p") (const_int 0))) "nothing") + + +(define_cpu_unit + "1_0m_bs, 1_0mi_bs, 1_0mm_bs, 1_0mf_bs, 1_0b_bs, 1_0bb_bs, 1_0mb_bs" + "one") +(define_cpu_unit + "1_1m_bs, 1_1mi_bs, 1_1mm_bs, 1_1mf_bs, 1_1b_bs, 1_1bb_bs, 1_1mb_bs" + "one") + +(define_cpu_unit "1_m_cont, 1_mi_cont, 1_mm_cont, 1_mf_cont, 1_mb_cont,\ + 1_b_cont, 1_bb_cont" "one") + +;; For stop in the middle of the bundles. +(define_cpu_unit "1_m_stop, 1_m0_stop, 1_m1_stop, 1_0mmi_cont" "one") +(define_cpu_unit "1_mi_stop, 1_mi0_stop, 1_mi1_stop, 1_0mii_cont" "one") + +(final_presence_set "1_0m_bs" + "1_0m.ii, 1_0m.mi, 1_0m.mf, 1_0m.fi, 1_0m.bb,\ + 1_0m.ib, 1_0m.fb, 1_0m.mb, 1_0m.lx") +(final_presence_set "1_1m_bs" + "1_1m.ii, 1_1m.mi, 1_1m.fi, 1_1m.bb, 1_1m.ib, 1_1m.fb, 1_1m.mb,\ + 1_1m.lx") +(final_presence_set "1_0mi_bs" "1_0mi.i, 1_0mi.i") +(final_presence_set "1_1mi_bs" "1_1mi.i, 1_1mi.i") +(final_presence_set "1_0mm_bs" "1_0mm.i, 1_0mm.f, 1_0mm.b") +(final_presence_set "1_1mm_bs" "1_1mm.i, 1_1mm.b") +(final_presence_set "1_0mf_bs" "1_0mf.i, 1_0mf.b") +(final_presence_set "1_1mf_bs" "1_1mf.i, 1_1mf.b") +(final_presence_set "1_0b_bs" "1_0b.bb") +(final_presence_set "1_1b_bs" "1_1b.bb") +(final_presence_set "1_0bb_bs" "1_0bb.b") +(final_presence_set "1_1bb_bs" "1_1bb.b") +(final_presence_set "1_0mb_bs" "1_0mb.b") +(final_presence_set "1_1mb_bs" "1_1mb.b") + +(exclusion_set "1_0m_bs" + "1_0mi.i, 1_0mm.i, 1_0mm.f, 1_0mf.i, 1_0mb.b,\ + 1_0mi.b, 1_0mf.b, 1_0mm.b, 1_0mlx., 1_m0_stop") +(exclusion_set "1_1m_bs" + "1_1mi.i, 1_1mm.i, 1_1mf.i, 1_1mb.b, 1_1mi.b, 1_1mf.b, 1_1mm.b,\ + 1_1mlx., 1_m1_stop") +(exclusion_set "1_0mi_bs" "1_0mii., 1_0mib., 1_mi0_stop") +(exclusion_set "1_1mi_bs" "1_1mii., 1_1mib., 1_mi1_stop") +(exclusion_set "1_0mm_bs" "1_0mmi., 1_0mmf., 1_0mmb.") +(exclusion_set "1_1mm_bs" "1_1mmi., 1_1mmb.") +(exclusion_set "1_0mf_bs" "1_0mfi., 1_0mfb.") +(exclusion_set "1_1mf_bs" "1_1mfi., 1_1mfb.") +(exclusion_set "1_0b_bs" "1_0bb.b") +(exclusion_set "1_1b_bs" "1_1bb.b") +(exclusion_set "1_0bb_bs" "1_0bbb.") +(exclusion_set "1_1bb_bs" "1_1bbb.") +(exclusion_set "1_0mb_bs" "1_0mbb.") +(exclusion_set "1_1mb_bs" "1_1mbb.") + +(exclusion_set + "1_0m_bs, 1_0mi_bs, 1_0mm_bs, 1_0mf_bs, 1_0b_bs, 1_0bb_bs, 1_0mb_bs, + 1_1m_bs, 1_1mi_bs, 1_1mm_bs, 1_1mf_bs, 1_1b_bs, 1_1bb_bs, 1_1mb_bs" + "1_stop") + +(final_presence_set + "1_0mi.i, 1_0mm.i, 1_0mf.i, 1_0mm.f, 1_0mb.b,\ + 1_0mi.b, 1_0mm.b, 1_0mf.b, 1_0mlx." + "1_m_cont") +(final_presence_set "1_0mii., 1_0mib." "1_mi_cont") +(final_presence_set "1_0mmi., 1_0mmf., 1_0mmb." "1_mm_cont") +(final_presence_set "1_0mfi., 1_0mfb." "1_mf_cont") +(final_presence_set "1_0bb.b" "1_b_cont") +(final_presence_set "1_0bbb." "1_bb_cont") +(final_presence_set "1_0mbb." "1_mb_cont") + +(exclusion_set + "1_0m.ii, 1_0m.mi, 1_0m.fi, 1_0m.mf, 1_0b.bb, 1_0m.bb,\ + 1_0m.ib, 1_0m.mb, 1_0m.fb, 1_0m.lx" + "1_m_cont, 1_mi_cont, 1_mm_cont, 1_mf_cont,\ + 1_mb_cont, 1_b_cont, 1_bb_cont") + +(exclusion_set "1_empty" + "1_m_cont,1_mi_cont,1_mm_cont,1_mf_cont,\ + 1_mb_cont,1_b_cont,1_bb_cont") + +;; For m;mi bundle +(final_presence_set "1_m0_stop" "1_0m.mi") +(final_presence_set "1_0mm.i" "1_0mmi_cont") +(exclusion_set "1_0mmi_cont" + "1_0m.ii, 1_0m.mi, 1_0m.fi, 1_0m.mf, 1_0b.bb, 1_0m.bb,\ + 1_0m.ib, 1_0m.mb, 1_0m.fb, 1_0m.lx") +(exclusion_set "1_m0_stop" "1_0mm.i") +(final_presence_set "1_m1_stop" "1_1m.mi") +(exclusion_set "1_m1_stop" "1_1mm.i") +(final_presence_set "1_m_stop" "1_m0_stop, 1_m1_stop") + +;; For mi;i bundle +(final_presence_set "1_mi0_stop" "1_0mi.i") +(final_presence_set "1_0mii." "1_0mii_cont") +(exclusion_set "1_0mii_cont" + "1_0m.ii, 1_0m.mi, 1_0m.fi, 1_0m.mf, 1_0b.bb, 1_0m.bb,\ + 1_0m.ib, 1_0m.mb, 1_0m.fb, 1_0m.lx") +(exclusion_set "1_mi0_stop" "1_0mii.") +(final_presence_set "1_mi1_stop" "1_1mi.i") +(exclusion_set "1_mi1_stop" "1_1mii.") +(final_presence_set "1_mi_stop" "1_mi0_stop, 1_mi1_stop") + +(final_absence_set + "1_0m.ii,1_0mi.i,1_0mii.,1_0m.mi,1_0mm.i,1_0mmi.,1_0m.fi,1_0mf.i,1_0mfi.,\ + 1_0m.mf,1_0mm.f,1_0mmf.,1_0b.bb,1_0bb.b,1_0bbb.,1_0m.bb,1_0mb.b,1_0mbb.,\ + 1_0m.ib,1_0mi.b,1_0mib.,1_0m.mb,1_0mm.b,1_0mmb.,1_0m.fb,1_0mf.b,1_0mfb.,\ + 1_0m.lx,1_0mlx., \ + 1_1m.ii,1_1mi.i,1_1mii.,1_1m.mi,1_1mm.i,1_1mmi.,1_1m.fi,1_1mf.i,1_1mfi.,\ + 1_1b.bb,1_1bb.b,1_1bbb.,1_1m.bb,1_1mb.b,1_1mbb.,\ + 1_1m.ib,1_1mi.b,1_1mib.,1_1m.mb,1_1mm.b,1_1mmb.,1_1m.fb,1_1mf.b,1_1mfb.,\ + 1_1m.lx,1_1mlx." + "1_m0_stop,1_m1_stop,1_mi0_stop,1_mi1_stop") + +(define_cpu_unit "1_m_cont_only, 1_b_cont_only" "one") +(define_cpu_unit "1_mi_cont_only, 1_mm_cont_only, 1_mf_cont_only" "one") +(define_cpu_unit "1_mb_cont_only, 1_bb_cont_only" "one") + +(final_presence_set "1_m_cont_only" "1_m_cont") +(exclusion_set "1_m_cont_only" + "1_0mi.i, 1_0mm.i, 1_0mf.i, 1_0mm.f, 1_0mb.b,\ + 1_0mi.b, 1_0mm.b, 1_0mf.b, 1_0mlx.") + +(final_presence_set "1_b_cont_only" "1_b_cont") +(exclusion_set "1_b_cont_only" "1_0bb.b") + +(final_presence_set "1_mi_cont_only" "1_mi_cont") +(exclusion_set "1_mi_cont_only" "1_0mii., 1_0mib.") + +(final_presence_set "1_mm_cont_only" "1_mm_cont") +(exclusion_set "1_mm_cont_only" "1_0mmi., 1_0mmf., 1_0mmb.") + +(final_presence_set "1_mf_cont_only" "1_mf_cont") +(exclusion_set "1_mf_cont_only" "1_0mfi., 1_0mfb.") + +(final_presence_set "1_mb_cont_only" "1_mb_cont") +(exclusion_set "1_mb_cont_only" "1_0mbb.") + +(final_presence_set "1_bb_cont_only" "1_bb_cont") +(exclusion_set "1_bb_cont_only" "1_0bbb.") + +(define_insn_reservation "1_pre_cycle" 0 + (and (and (eq_attr "cpu" "itanium") + (eq_attr "itanium_class" "pre_cycle")) + (eq (symbol_ref "bundling_p") (const_int 0))) + "(1_0m_bs, 1_m_cont) \ + | (1_0mi_bs, (1_mi_cont|nothing)) \ + | (1_0mm_bs, 1_mm_cont) \ + | (1_0mf_bs, (1_mf_cont|nothing)) \ + | (1_0b_bs, (1_b_cont|nothing)) \ + | (1_0bb_bs, (1_bb_cont|nothing)) \ + | (1_0mb_bs, (1_mb_cont|nothing)) \ + | (1_1m_bs, 1_m_cont) \ + | (1_1mi_bs, (1_mi_cont|nothing)) \ + | (1_1mm_bs, 1_mm_cont) \ + | (1_1mf_bs, (1_mf_cont|nothing)) \ + | (1_1b_bs, (1_b_cont|nothing)) \ + | (1_1bb_bs, (1_bb_cont|nothing)) \ + | (1_1mb_bs, (1_mb_cont|nothing)) \ + | (1_m_cont_only, (1_m_cont|nothing)) \ + | (1_b_cont_only, (1_b_cont|nothing)) \ + | (1_mi_cont_only, (1_mi_cont|nothing)) \ + | (1_mm_cont_only, (1_mm_cont|nothing)) \ + | (1_mf_cont_only, (1_mf_cont|nothing)) \ + | (1_mb_cont_only, (1_mb_cont|nothing)) \ + | (1_bb_cont_only, (1_bb_cont|nothing)) \ + | (1_m_stop, (1_0mmi_cont|nothing)) \ + | (1_mi_stop, (1_0mii_cont|nothing))") + +;; Bypasses: +(define_bypass 1 "1_fcmp" "1_br,1_scall") +;; ??? I found 7 cycle dealy for 1_fmac -> 1_fcmp for Itanium1 +(define_bypass 7 "1_fmac" "1_fmisc,1_fcvtfx,1_xmpy,1_fcmp") + +;; ??? +(define_bypass 3 "1_frbr" "1_mmmul,1_mmshf") +(define_bypass 14 "1_frar_i" "1_mmmul,1_mmshf") +(define_bypass 7 "1_frar_m" "1_mmmul,1_mmshf") + +;; ???? +;; There is only one insn `mov ar.pfs =' for toar_i. +(define_bypass 0 "1_tobr,1_topr,1_toar_i" "1_br,1_scall") + +(define_bypass 3 "1_ialu,1_ialu_addr" "1_mmmul,1_mmshf") +;; ??? howto describe ialu for I slot only. We use ialu_addr for that +;;(define_bypass 2 "1_ialu" "1_ld" "ia64_ld_address_bypass_p") +;; ??? howto describe ialu st/address for I slot only. We use ialu_addr +;; for that. +;;(define_bypass 2 "1_ialu" "1_st" "ia64_st_address_bypass_p") + +(define_bypass 0 "1_icmp" "1_br,1_scall") + +(define_bypass 3 "1_ilog" "1_mmmul,1_mmshf") + +(define_bypass 2 "1_ilog,1_xtd" "1_ld" "ia64_ld_address_bypass_p") +(define_bypass 2 "1_ilog,1_xtd" "1_st" "ia64_st_address_bypass_p") + +(define_bypass 3 "1_ld" "1_mmmul,1_mmshf") +(define_bypass 3 "1_ld" "1_ld" "ia64_ld_address_bypass_p") +(define_bypass 3 "1_ld" "1_st" "ia64_st_address_bypass_p") + +;; Intel docs say only LD, ST, IALU, ILOG, ISHF consumers have latency 4, +;; but HP engineers say any non-MM operation. +(define_bypass 4 "1_mmmul,1_mmshf" + "1_br,1_fcmp,1_fcvtfx,1_fld,1_fmac,1_fmisc,1_frar_i,1_frar_m,\ + 1_frbr,1_frfr,1_frpr,1_ialu,1_icmp,1_ilog,1_ishf,1_ld,1_chk_s,\ + 1_long_i,1_rse_m,1_sem,1_stf,1_st,1_syst_m0,1_syst_m,\ + 1_tbit,1_toar_i,1_toar_m,1_tobr,1_tofr,1_topr,1_xmpy,1_xtd") + +;; ??? how to describe that if scheduled < 4 cycle then latency is 10 cycles. +;; (define_bypass 10 "1_mmmul,1_mmshf" "1_ialu,1_ilog,1_ishf,1_st,1_ld") + +(define_bypass 0 "1_tbit" "1_br,1_scall") + +(define_bypass 8 "1_tofr" "1_frfr,1_stf") +(define_bypass 7 "1_fmisc,1_fcvtfx,1_fmac,1_xmpy" "1_frfr") +(define_bypass 8 "1_fmisc,1_fcvtfx,1_fmac,1_xmpy" "1_stf") + +;; We don't use here fcmp because scall may be predicated. +(define_bypass 0 "1_fcvtfx,1_fld,1_fmac,1_fmisc,1_frar_i,1_frar_m,\ + 1_frbr,1_frfr,1_frpr,1_ialu,1_ialu_addr,1_ilog,1_ishf,\ + 1_ld,1_long_i,1_mmmul,1_mmshf,1_mmshfi,1_toar_m,1_tofr,\ + 1_xmpy,1_xtd" "1_scall") + +(define_bypass 0 "1_unknown,1_ignore,1_stop_bit,1_br,1_fcmp,1_fcvtfx,\ + 1_fld,1_fmac,1_fmisc,1_frar_i,1_frar_m,1_frbr,1_frfr,\ + 1_frpr,1_ialu,1_ialu_addr,1_icmp,1_ilog,1_ishf,1_ld,\ + 1_chk_s,1_long_i,1_mmmul,1_mmshf,1_mmshfi,1_nop,\ + 1_nop_b,1_nop_f,1_nop_i,1_nop_m,1_nop_x,1_rse_m,1_scall,\ + 1_sem,1_stf,1_st,1_syst_m0,1_syst_m,1_tbit,1_toar_i,\ + 1_toar_m,1_tobr,1_tofr,1_topr,1_xmpy,1_xtd,1_lfetch" + "1_ignore") + + +;; Bundling + +(define_automaton "oneb") + +;; Pseudo units for quicker searching for position in two packet window. */ +(define_query_cpu_unit "1_1,1_2,1_3,1_4,1_5,1_6" "oneb") + +;; All possible combinations of bundles/syllables +(define_cpu_unit + "1b_0m.ii, 1b_0m.mi, 1b_0m.fi, 1b_0m.mf, 1b_0b.bb, 1b_0m.bb,\ + 1b_0m.ib, 1b_0m.mb, 1b_0m.fb, 1b_0m.lx" "oneb") +(define_cpu_unit + "1b_0mi.i, 1b_0mm.i, 1b_0mf.i, 1b_0mm.f, 1b_0bb.b, 1b_0mb.b,\ + 1b_0mi.b, 1b_0mm.b, 1b_0mf.b" "oneb") +(define_query_cpu_unit + "1b_0mii., 1b_0mmi., 1b_0mfi., 1b_0mmf., 1b_0bbb., 1b_0mbb.,\ + 1b_0mib., 1b_0mmb., 1b_0mfb., 1b_0mlx." "oneb") + +(define_cpu_unit "1b_1m.ii, 1b_1m.mi, 1b_1m.fi, 1b_1b.bb, 1b_1m.bb,\ + 1b_1m.ib, 1b_1m.mb, 1b_1m.fb, 1b_1m.lx" "oneb") +(define_cpu_unit "1b_1mi.i, 1b_1mm.i, 1b_1mf.i, 1b_1bb.b, 1b_1mb.b,\ + 1b_1mi.b, 1b_1mm.b, 1b_1mf.b" "oneb") +(define_query_cpu_unit "1b_1mii., 1b_1mmi., 1b_1mfi., 1b_1bbb., 1b_1mbb.,\ + 1b_1mib., 1b_1mmb., 1b_1mfb., 1b_1mlx." "oneb") + +;; Slot 1 +(exclusion_set "1b_0m.ii" + "1b_0m.mi, 1b_0m.fi, 1b_0m.mf, 1b_0b.bb, 1b_0m.bb,\ + 1b_0m.ib, 1b_0m.mb, 1b_0m.fb, 1b_0m.lx") +(exclusion_set "1b_0m.mi" + "1b_0m.fi, 1b_0m.mf, 1b_0b.bb, 1b_0m.bb, 1b_0m.ib,\ + 1b_0m.mb, 1b_0m.fb, 1b_0m.lx") +(exclusion_set "1b_0m.fi" + "1b_0m.mf, 1b_0b.bb, 1b_0m.bb, 1b_0m.ib, 1b_0m.mb, 1b_0m.fb, 1b_0m.lx") +(exclusion_set "1b_0m.mf" + "1b_0b.bb, 1b_0m.bb, 1b_0m.ib, 1b_0m.mb, 1b_0m.fb, 1b_0m.lx") +(exclusion_set "1b_0b.bb" "1b_0m.bb, 1b_0m.ib, 1b_0m.mb, 1b_0m.fb, 1b_0m.lx") +(exclusion_set "1b_0m.bb" "1b_0m.ib, 1b_0m.mb, 1b_0m.fb, 1b_0m.lx") +(exclusion_set "1b_0m.ib" "1b_0m.mb, 1b_0m.fb, 1b_0m.lx") +(exclusion_set "1b_0m.mb" "1b_0m.fb, 1b_0m.lx") +(exclusion_set "1b_0m.fb" "1b_0m.lx") + +;; Slot 2 +(exclusion_set "1b_0mi.i" + "1b_0mm.i, 1b_0mf.i, 1b_0mm.f, 1b_0bb.b, 1b_0mb.b,\ + 1b_0mi.b, 1b_0mm.b, 1b_0mf.b, 1b_0mlx.") +(exclusion_set "1b_0mm.i" + "1b_0mf.i, 1b_0mm.f, 1b_0bb.b, 1b_0mb.b,\ + 1b_0mi.b, 1b_0mm.b, 1b_0mf.b, 1b_0mlx.") +(exclusion_set "1b_0mf.i" + "1b_0mm.f, 1b_0bb.b, 1b_0mb.b, 1b_0mi.b, 1b_0mm.b, 1b_0mf.b, 1b_0mlx.") +(exclusion_set "1b_0mm.f" + "1b_0bb.b, 1b_0mb.b, 1b_0mi.b, 1b_0mm.b, 1b_0mf.b, 1b_0mlx.") +(exclusion_set "1b_0bb.b" "1b_0mb.b, 1b_0mi.b, 1b_0mm.b, 1b_0mf.b, 1b_0mlx.") +(exclusion_set "1b_0mb.b" "1b_0mi.b, 1b_0mm.b, 1b_0mf.b, 1b_0mlx.") +(exclusion_set "1b_0mi.b" "1b_0mm.b, 1b_0mf.b, 1b_0mlx.") +(exclusion_set "1b_0mm.b" "1b_0mf.b, 1b_0mlx.") +(exclusion_set "1b_0mf.b" "1b_0mlx.") + +;; Slot 3 +(exclusion_set "1b_0mii." + "1b_0mmi., 1b_0mfi., 1b_0mmf., 1b_0bbb., 1b_0mbb.,\ + 1b_0mib., 1b_0mmb., 1b_0mfb., 1b_0mlx.") +(exclusion_set "1b_0mmi." + "1b_0mfi., 1b_0mmf., 1b_0bbb., 1b_0mbb.,\ + 1b_0mib., 1b_0mmb., 1b_0mfb., 1b_0mlx.") +(exclusion_set "1b_0mfi." + "1b_0mmf., 1b_0bbb., 1b_0mbb., 1b_0mib., 1b_0mmb., 1b_0mfb., 1b_0mlx.") +(exclusion_set "1b_0mmf." + "1b_0bbb., 1b_0mbb., 1b_0mib., 1b_0mmb., 1b_0mfb., 1b_0mlx.") +(exclusion_set "1b_0bbb." "1b_0mbb., 1b_0mib., 1b_0mmb., 1b_0mfb., 1b_0mlx.") +(exclusion_set "1b_0mbb." "1b_0mib., 1b_0mmb., 1b_0mfb., 1b_0mlx.") +(exclusion_set "1b_0mib." "1b_0mmb., 1b_0mfb., 1b_0mlx.") +(exclusion_set "1b_0mmb." "1b_0mfb., 1b_0mlx.") +(exclusion_set "1b_0mfb." "1b_0mlx.") + +;; Slot 4 +(exclusion_set "1b_1m.ii" + "1b_1m.mi, 1b_1m.fi, 1b_1b.bb, 1b_1m.bb,\ + 1b_1m.ib, 1b_1m.mb, 1b_1m.fb, 1b_1m.lx") +(exclusion_set "1b_1m.mi" + "1b_1m.fi, 1b_1b.bb, 1b_1m.bb, 1b_1m.ib, 1b_1m.mb, 1b_1m.fb, 1b_1m.lx") +(exclusion_set "1b_1m.fi" + "1b_1b.bb, 1b_1m.bb, 1b_1m.ib, 1b_1m.mb, 1b_1m.fb, 1b_1m.lx") +(exclusion_set "1b_1b.bb" "1b_1m.bb, 1b_1m.ib, 1b_1m.mb, 1b_1m.fb, 1b_1m.lx") +(exclusion_set "1b_1m.bb" "1b_1m.ib, 1b_1m.mb, 1b_1m.fb, 1b_1m.lx") +(exclusion_set "1b_1m.ib" "1b_1m.mb, 1b_1m.fb, 1b_1m.lx") +(exclusion_set "1b_1m.mb" "1b_1m.fb, 1b_1m.lx") +(exclusion_set "1b_1m.fb" "1b_1m.lx") + +;; Slot 5 +(exclusion_set "1b_1mi.i" + "1b_1mm.i, 1b_1mf.i, 1b_1bb.b, 1b_1mb.b,\ + 1b_1mi.b, 1b_1mm.b, 1b_1mf.b, 1b_1mlx.") +(exclusion_set "1b_1mm.i" + "1b_1mf.i, 1b_1bb.b, 1b_1mb.b, 1b_1mi.b, 1b_1mm.b, 1b_1mf.b, 1b_1mlx.") +(exclusion_set "1b_1mf.i" + "1b_1bb.b, 1b_1mb.b, 1b_1mi.b, 1b_1mm.b, 1b_1mf.b, 1b_1mlx.") +(exclusion_set "1b_1bb.b" "1b_1mb.b, 1b_1mi.b, 1b_1mm.b, 1b_1mf.b, 1b_1mlx.") +(exclusion_set "1b_1mb.b" "1b_1mi.b, 1b_1mm.b, 1b_1mf.b, 1b_1mlx.") +(exclusion_set "1b_1mi.b" "1b_1mm.b, 1b_1mf.b, 1b_1mlx.") +(exclusion_set "1b_1mm.b" "1b_1mf.b, 1b_1mlx.") +(exclusion_set "1b_1mf.b" "1b_1mlx.") + +;; Slot 6 +(exclusion_set "1b_1mii." + "1b_1mmi., 1b_1mfi., 1b_1bbb., 1b_1mbb.,\ + 1b_1mib., 1b_1mmb., 1b_1mfb., 1b_1mlx.") +(exclusion_set "1b_1mmi." + "1b_1mfi., 1b_1bbb., 1b_1mbb., 1b_1mib., 1b_1mmb., 1b_1mfb., 1b_1mlx.") +(exclusion_set "1b_1mfi." + "1b_1bbb., 1b_1mbb., 1b_1mib., 1b_1mmb., 1b_1mfb., 1b_1mlx.") +(exclusion_set "1b_1bbb." "1b_1mbb., 1b_1mib., 1b_1mmb., 1b_1mfb., 1b_1mlx.") +(exclusion_set "1b_1mbb." "1b_1mib., 1b_1mmb., 1b_1mfb., 1b_1mlx.") +(exclusion_set "1b_1mib." "1b_1mmb., 1b_1mfb., 1b_1mlx.") +(exclusion_set "1b_1mmb." "1b_1mfb., 1b_1mlx.") +(exclusion_set "1b_1mfb." "1b_1mlx.") + +(final_presence_set "1b_0mi.i" "1b_0m.ii") +(final_presence_set "1b_0mii." "1b_0mi.i") +(final_presence_set "1b_1mi.i" "1b_1m.ii") +(final_presence_set "1b_1mii." "1b_1mi.i") + +(final_presence_set "1b_0mm.i" "1b_0m.mi") +(final_presence_set "1b_0mmi." "1b_0mm.i") +(final_presence_set "1b_1mm.i" "1b_1m.mi") +(final_presence_set "1b_1mmi." "1b_1mm.i") + +(final_presence_set "1b_0mf.i" "1b_0m.fi") +(final_presence_set "1b_0mfi." "1b_0mf.i") +(final_presence_set "1b_1mf.i" "1b_1m.fi") +(final_presence_set "1b_1mfi." "1b_1mf.i") + +(final_presence_set "1b_0mm.f" "1b_0m.mf") +(final_presence_set "1b_0mmf." "1b_0mm.f") + +(final_presence_set "1b_0bb.b" "1b_0b.bb") +(final_presence_set "1b_0bbb." "1b_0bb.b") +(final_presence_set "1b_1bb.b" "1b_1b.bb") +(final_presence_set "1b_1bbb." "1b_1bb.b") + +(final_presence_set "1b_0mb.b" "1b_0m.bb") +(final_presence_set "1b_0mbb." "1b_0mb.b") +(final_presence_set "1b_1mb.b" "1b_1m.bb") +(final_presence_set "1b_1mbb." "1b_1mb.b") + +(final_presence_set "1b_0mi.b" "1b_0m.ib") +(final_presence_set "1b_0mib." "1b_0mi.b") +(final_presence_set "1b_1mi.b" "1b_1m.ib") +(final_presence_set "1b_1mib." "1b_1mi.b") + +(final_presence_set "1b_0mm.b" "1b_0m.mb") +(final_presence_set "1b_0mmb." "1b_0mm.b") +(final_presence_set "1b_1mm.b" "1b_1m.mb") +(final_presence_set "1b_1mmb." "1b_1mm.b") + +(final_presence_set "1b_0mf.b" "1b_0m.fb") +(final_presence_set "1b_0mfb." "1b_0mf.b") +(final_presence_set "1b_1mf.b" "1b_1m.fb") +(final_presence_set "1b_1mfb." "1b_1mf.b") + +(final_presence_set "1b_0mlx." "1b_0m.lx") +(final_presence_set "1b_1mlx." "1b_1m.lx") + +(final_presence_set + "1b_1m.ii,1b_1m.mi,1b_1m.fi,1b_1b.bb,1b_1m.bb,\ + 1b_1m.ib,1b_1m.mb,1b_1m.fb,1b_1m.lx" + "1b_0mii.,1b_0mmi.,1b_0mfi.,1b_0mmf.,1b_0bbb.,1b_0mbb.,\ + 1b_0mib.,1b_0mmb.,1b_0mfb.,1b_0mlx.") + +;; Microarchitecture units: +(define_cpu_unit + "1b_um0, 1b_um1, 1b_ui0, 1b_ui1, 1b_uf0, 1b_uf1, 1b_ub0, 1b_ub1, 1b_ub2,\ + 1b_unb0, 1b_unb1, 1b_unb2" "oneb") + +(exclusion_set "1b_ub0" "1b_unb0") +(exclusion_set "1b_ub1" "1b_unb1") +(exclusion_set "1b_ub2" "1b_unb2") + +;; The following rules are used to decrease number of alternatives. +;; They are consequences of Itanium microarchitecture. They also +;; describe the following rules mentioned in Itanium +;; microarchitecture: rules mentioned in Itanium microarchitecture: +;; o "MMF: Always splits issue before the first M and after F regardless +;; of surrounding bundles and stops". +;; o "BBB/MBB: Always splits issue after either of these bundles". +;; o "MIB BBB: Split issue after the first bundle in this pair". + +(exclusion_set "1b_0m.mf,1b_0mm.f,1b_0mmf." + "1b_1m.ii,1b_1m.mi,1b_1m.fi,1b_1b.bb,1b_1m.bb,\ + 1b_1m.ib,1b_1m.mb,1b_1m.fb,1b_1m.lx") +(exclusion_set "1b_0b.bb,1b_0bb.b,1b_0bbb.,1b_0m.bb,1b_0mb.b,1b_0mbb." + "1b_1m.ii,1b_1m.mi,1b_1m.fi,1b_1b.bb,1b_1m.bb,\ + 1b_1m.ib,1b_1m.mb,1b_1m.fb,1b_1m.lx") +(exclusion_set "1b_0m.ib,1b_0mi.b,1b_0mib." "1b_1b.bb") + +;; For exceptions of M, I, B, F insns: +(define_cpu_unit "1b_not_um1, 1b_not_ui1, 1b_not_uf1" "oneb") + +(final_absence_set "1b_not_um1" "1b_um1") +(final_absence_set "1b_not_ui1" "1b_ui1") +(final_absence_set "1b_not_uf1" "1b_uf1") + +;;; "MIB/MFB/MMB: Splits issue after any of these bundles unless the +;;; B-slot contains a nop.b or a brp instruction". +;;; "The B in an MIB/MFB/MMB bundle disperses to B0 if it is a brp or +;;; nop.b, otherwise it disperses to B2". +(final_absence_set + "1b_1m.ii, 1b_1m.mi, 1b_1m.fi, 1b_1b.bb, 1b_1m.bb,\ + 1b_1m.ib, 1b_1m.mb, 1b_1m.fb, 1b_1m.lx" + "1b_0mib. 1b_ub2, 1b_0mfb. 1b_ub2, 1b_0mmb. 1b_ub2") + +;; This is necessary to start new processor cycle when we meet stop bit. +(define_cpu_unit "1b_stop" "oneb") +(final_absence_set + "1b_0m.ii,1b_0mi.i,1b_0mii.,1b_0m.mi,1b_0mm.i,1b_0mmi.,\ + 1b_0m.fi,1b_0mf.i,1b_0mfi.,\ + 1b_0m.mf,1b_0mm.f,1b_0mmf.,1b_0b.bb,1b_0bb.b,1b_0bbb.,\ + 1b_0m.bb,1b_0mb.b,1b_0mbb.,\ + 1b_0m.ib,1b_0mi.b,1b_0mib.,1b_0m.mb,1b_0mm.b,1b_0mmb.,\ + 1b_0m.fb,1b_0mf.b,1b_0mfb.,1b_0m.lx,1b_0mlx., \ + 1b_1m.ii,1b_1mi.i,1b_1mii.,1b_1m.mi,1b_1mm.i,1b_1mmi.,\ + 1b_1m.fi,1b_1mf.i,1b_1mfi.,\ + 1b_1b.bb,1b_1bb.b,1b_1bbb.,1b_1m.bb,1b_1mb.b,1b_1mbb.,\ + 1b_1m.ib,1b_1mi.b,1b_1mib.,\ + 1b_1m.mb,1b_1mm.b,1b_1mmb.,1b_1m.fb,1b_1mf.b,1b_1mfb.,1b_1m.lx,1b_1mlx." + "1b_stop") + +;; M and I instruction is dispersed to the lowest numbered M or I unit +;; not already in use. An I slot in the 3rd position of 2nd bundle is +;; always dispersed to I1 +(final_presence_set "1b_um1" "1b_um0") +(final_presence_set "1b_ui1" "1b_ui0, 1b_1mii., 1b_1mmi., 1b_1mfi.") + +;; Insns + +;; M and I instruction is dispersed to the lowest numbered M or I unit +;; not already in use. An I slot in the 3rd position of 2nd bundle is +;; always dispersed to I1 +(define_reservation "1b_M" + "1b_0m.ii+1_1+1b_um0|1b_0m.mi+1_1+1b_um0|1b_0mm.i+1_2+(1b_um0|1b_um1)\ + |1b_0m.fi+1_1+1b_um0|1b_0m.mf+1_1+1b_um0|1b_0mm.f+1_2+1b_um1\ + |1b_0m.bb+1_1+1b_um0|1b_0m.ib+1_1+1b_um0|1b_0m.mb+1_1+1b_um0\ + |1b_0mm.b+1_2+1b_um1|1b_0m.fb+1_1+1b_um0|1b_0m.lx+1_1+1b_um0\ + |1b_1mm.i+1_5+1b_um1|1b_1mm.b+1_5+1b_um1\ + |(1b_1m.ii+1_4|1b_1m.mi+1_4|1b_1m.fi+1_4|1b_1m.bb+1_4|1b_1m.ib+1_4\ + |1b_1m.mb+1_4|1b_1m.fb+1_4|1b_1m.lx+1_4)\ + +(1b_um0|1b_um1)") + +;; Exceptions for dispersal rules. +;; "An I slot in the 3rd position of 2nd bundle is always dispersed to I1". +(define_reservation "1b_I" + "1b_0mi.i+1_2+1b_ui0|1b_0mii.+1_3+(1b_ui0|1b_ui1)|1b_0mmi.+1_3+1b_ui0\ + |1b_0mfi.+1_3+1b_ui0|1b_0mi.b+1_2+1b_ui0\ + |(1b_1mi.i+1_5|1b_1mi.b+1_5)+(1b_ui0|1b_ui1)\ + |1b_1mii.+1_6+1b_ui1|1b_1mmi.+1_6+1b_ui1|1b_1mfi.+1_6+1b_ui1") + +;; "An F slot in the 1st bundle disperses to F0". +;; "An F slot in the 2st bundle disperses to F1". +(define_reservation "1b_F" + "1b_0mf.i+1_2+1b_uf0|1b_0mmf.+1_3+1b_uf0|1b_0mf.b+1_2+1b_uf0\ + |1b_1mf.i+1_5+1b_uf1|1b_1mf.b+1_5+1b_uf1") + +;;; "Each B slot in MBB or BBB bundle disperses to the corresponding B +;;; unit. That is, a B slot in 1st position is despersed to B0. In the +;;; 2nd position it is dispersed to B2". +(define_reservation "1b_NB" + "1b_0b.bb+1_1+1b_unb0|1b_0bb.b+1_2+1b_unb1|1b_0bbb.+1_3+1b_unb2\ + |1b_0mb.b+1_2+1b_unb1|1b_0mbb.+1_3+1b_unb2\ + |1b_0mib.+1_3+1b_unb0|1b_0mmb.+1_3+1b_unb0|1b_0mfb.+1_3+1b_unb0\ + |1b_1b.bb+1_4+1b_unb0|1b_1bb.b+1_5+1b_unb1\ + |1b_1bbb.+1_6+1b_unb2|1b_1mb.b+1_5+1b_unb1|1b_1mbb.+1_6+1b_unb2\ + |1b_1mib.+1_6+1b_unb0|1b_1mmb.+1_6+1b_unb0|1b_1mfb.+1_6+1b_unb0") + +(define_reservation "1b_B" + "1b_0b.bb+1_1+1b_ub0|1b_0bb.b+1_2+1b_ub1|1b_0bbb.+1_3+1b_ub2\ + |1b_0mb.b+1_2+1b_ub1|1b_0mbb.+1_3+1b_ub2|1b_0mib.+1_3+1b_ub2\ + |1b_0mfb.+1_3+1b_ub2|1b_1b.bb+1_4+1b_ub0|1b_1bb.b+1_5+1b_ub1\ + |1b_1bbb.+1_6+1b_ub2|1b_1mb.b+1_5+1b_ub1\ + |1b_1mib.+1_6+1b_ub2|1b_1mmb.+1_6+1b_ub2|1b_1mfb.+1_6+1b_ub2") + +(define_reservation "1b_L" "1b_0mlx.+1_3+1b_ui0+1b_uf0\ + |1b_1mlx.+1_6+(1b_ui0|1b_ui1)+1b_uf1") + +;; We assume that there is no insn issued on the same cycle as unknown insn. +(define_cpu_unit "1b_empty" "oneb") +(exclusion_set "1b_empty" + "1b_0m.ii,1b_0m.mi,1b_0m.fi,1b_0m.mf,1b_0b.bb,1b_0m.bb,\ + 1b_0m.ib,1b_0m.mb,1b_0m.fb,1b_0m.lx") + +(define_cpu_unit + "1b_0m_bs, 1b_0mi_bs, 1b_0mm_bs, 1b_0mf_bs, 1b_0b_bs, 1b_0bb_bs, 1b_0mb_bs" + "oneb") +(define_cpu_unit + "1b_1m_bs, 1b_1mi_bs, 1b_1mm_bs, 1b_1mf_bs, 1b_1b_bs, 1b_1bb_bs, 1b_1mb_bs" + "oneb") + +(define_cpu_unit "1b_m_cont, 1b_mi_cont, 1b_mm_cont, 1b_mf_cont, 1b_mb_cont,\ + 1b_b_cont, 1b_bb_cont" "oneb") + +;; For stop in the middle of the bundles. +(define_cpu_unit "1b_m_stop, 1b_m0_stop, 1b_m1_stop, 1b_0mmi_cont" "oneb") +(define_cpu_unit "1b_mi_stop, 1b_mi0_stop, 1b_mi1_stop, 1b_0mii_cont" "oneb") + +(final_presence_set "1b_0m_bs" + "1b_0m.ii, 1b_0m.mi, 1b_0m.mf, 1b_0m.fi, 1b_0m.bb,\ + 1b_0m.ib, 1b_0m.fb, 1b_0m.mb, 1b_0m.lx") +(final_presence_set "1b_1m_bs" + "1b_1m.ii, 1b_1m.mi, 1b_1m.fi, 1b_1m.bb, 1b_1m.ib, 1b_1m.fb, 1b_1m.mb,\ + 1b_1m.lx") +(final_presence_set "1b_0mi_bs" "1b_0mi.i, 1b_0mi.i") +(final_presence_set "1b_1mi_bs" "1b_1mi.i, 1b_1mi.i") +(final_presence_set "1b_0mm_bs" "1b_0mm.i, 1b_0mm.f, 1b_0mm.b") +(final_presence_set "1b_1mm_bs" "1b_1mm.i, 1b_1mm.b") +(final_presence_set "1b_0mf_bs" "1b_0mf.i, 1b_0mf.b") +(final_presence_set "1b_1mf_bs" "1b_1mf.i, 1b_1mf.b") +(final_presence_set "1b_0b_bs" "1b_0b.bb") +(final_presence_set "1b_1b_bs" "1b_1b.bb") +(final_presence_set "1b_0bb_bs" "1b_0bb.b") +(final_presence_set "1b_1bb_bs" "1b_1bb.b") +(final_presence_set "1b_0mb_bs" "1b_0mb.b") +(final_presence_set "1b_1mb_bs" "1b_1mb.b") + +(exclusion_set "1b_0m_bs" + "1b_0mi.i, 1b_0mm.i, 1b_0mm.f, 1b_0mf.i, 1b_0mb.b,\ + 1b_0mi.b, 1b_0mf.b, 1b_0mm.b, 1b_0mlx., 1b_m0_stop") +(exclusion_set "1b_1m_bs" + "1b_1mi.i, 1b_1mm.i, 1b_1mf.i, 1b_1mb.b, 1b_1mi.b, 1b_1mf.b, 1b_1mm.b,\ + 1b_1mlx., 1b_m1_stop") +(exclusion_set "1b_0mi_bs" "1b_0mii., 1b_0mib., 1b_mi0_stop") +(exclusion_set "1b_1mi_bs" "1b_1mii., 1b_1mib., 1b_mi1_stop") +(exclusion_set "1b_0mm_bs" "1b_0mmi., 1b_0mmf., 1b_0mmb.") +(exclusion_set "1b_1mm_bs" "1b_1mmi., 1b_1mmb.") +(exclusion_set "1b_0mf_bs" "1b_0mfi., 1b_0mfb.") +(exclusion_set "1b_1mf_bs" "1b_1mfi., 1b_1mfb.") +(exclusion_set "1b_0b_bs" "1b_0bb.b") +(exclusion_set "1b_1b_bs" "1b_1bb.b") +(exclusion_set "1b_0bb_bs" "1b_0bbb.") +(exclusion_set "1b_1bb_bs" "1b_1bbb.") +(exclusion_set "1b_0mb_bs" "1b_0mbb.") +(exclusion_set "1b_1mb_bs" "1b_1mbb.") + +(exclusion_set + "1b_0m_bs, 1b_0mi_bs, 1b_0mm_bs, 1b_0mf_bs, 1b_0b_bs, 1b_0bb_bs, 1b_0mb_bs, + 1b_1m_bs, 1b_1mi_bs, 1b_1mm_bs, 1b_1mf_bs, 1b_1b_bs, 1b_1bb_bs, 1b_1mb_bs" + "1b_stop") + +(final_presence_set + "1b_0mi.i, 1b_0mm.i, 1b_0mf.i, 1b_0mm.f, 1b_0mb.b,\ + 1b_0mi.b, 1b_0mm.b, 1b_0mf.b, 1b_0mlx." + "1b_m_cont") +(final_presence_set "1b_0mii., 1b_0mib." "1b_mi_cont") +(final_presence_set "1b_0mmi., 1b_0mmf., 1b_0mmb." "1b_mm_cont") +(final_presence_set "1b_0mfi., 1b_0mfb." "1b_mf_cont") +(final_presence_set "1b_0bb.b" "1b_b_cont") +(final_presence_set "1b_0bbb." "1b_bb_cont") +(final_presence_set "1b_0mbb." "1b_mb_cont") + +(exclusion_set + "1b_0m.ii, 1b_0m.mi, 1b_0m.fi, 1b_0m.mf, 1b_0b.bb, 1b_0m.bb,\ + 1b_0m.ib, 1b_0m.mb, 1b_0m.fb, 1b_0m.lx" + "1b_m_cont, 1b_mi_cont, 1b_mm_cont, 1b_mf_cont,\ + 1b_mb_cont, 1b_b_cont, 1b_bb_cont") + +(exclusion_set "1b_empty" + "1b_m_cont,1b_mi_cont,1b_mm_cont,1b_mf_cont,\ + 1b_mb_cont,1b_b_cont,1b_bb_cont") + +;; For m;mi bundle +(final_presence_set "1b_m0_stop" "1b_0m.mi") +(final_presence_set "1b_0mm.i" "1b_0mmi_cont") +(exclusion_set "1b_0mmi_cont" + "1b_0m.ii, 1b_0m.mi, 1b_0m.fi, 1b_0m.mf, 1b_0b.bb, 1b_0m.bb,\ + 1b_0m.ib, 1b_0m.mb, 1b_0m.fb, 1b_0m.lx") +(exclusion_set "1b_m0_stop" "1b_0mm.i") +(final_presence_set "1b_m1_stop" "1b_1m.mi") +(exclusion_set "1b_m1_stop" "1b_1mm.i") +(final_presence_set "1b_m_stop" "1b_m0_stop, 1b_m1_stop") + +;; For mi;i bundle +(final_presence_set "1b_mi0_stop" "1b_0mi.i") +(final_presence_set "1b_0mii." "1b_0mii_cont") +(exclusion_set "1b_0mii_cont" + "1b_0m.ii, 1b_0m.mi, 1b_0m.fi, 1b_0m.mf, 1b_0b.bb, 1b_0m.bb,\ + 1b_0m.ib, 1b_0m.mb, 1b_0m.fb, 1b_0m.lx") +(exclusion_set "1b_mi0_stop" "1b_0mii.") +(final_presence_set "1b_mi1_stop" "1b_1mi.i") +(exclusion_set "1b_mi1_stop" "1b_1mii.") +(final_presence_set "1b_mi_stop" "1b_mi0_stop, 1b_mi1_stop") + +(final_absence_set + "1b_0m.ii,1b_0mi.i,1b_0mii.,1b_0m.mi,1b_0mm.i,1b_0mmi.,\ + 1b_0m.fi,1b_0mf.i,1b_0mfi.,1b_0m.mf,1b_0mm.f,1b_0mmf.,\ + 1b_0b.bb,1b_0bb.b,1b_0bbb.,1b_0m.bb,1b_0mb.b,1b_0mbb.,\ + 1b_0m.ib,1b_0mi.b,1b_0mib.,1b_0m.mb,1b_0mm.b,1b_0mmb.,\ + 1b_0m.fb,1b_0mf.b,1b_0mfb.,1b_0m.lx,1b_0mlx., \ + 1b_1m.ii,1b_1mi.i,1b_1mii.,1b_1m.mi,1b_1mm.i,1b_1mmi.,\ + 1b_1m.fi,1b_1mf.i,1b_1mfi.,\ + 1b_1b.bb,1b_1bb.b,1b_1bbb.,1b_1m.bb,1b_1mb.b,1b_1mbb.,\ + 1b_1m.ib,1b_1mi.b,1b_1mib.,1b_1m.mb,1b_1mm.b,1b_1mmb.,\ + 1b_1m.fb,1b_1mf.b,1b_1mfb.,1b_1m.lx,1b_1mlx." + "1b_m0_stop,1b_m1_stop,1b_mi0_stop,1b_mi1_stop") + +(define_reservation "1b_A" "1b_M|1b_I") + +(define_insn_reservation "1b_stop_bit" 0 + (and (and (eq_attr "cpu" "itanium") + (eq_attr "itanium_class" "stop_bit")) + (ne (symbol_ref "bundling_p") (const_int 0))) + "1b_stop|1b_m0_stop|1b_m1_stop|1b_mi0_stop|1b_mi1_stop") +(define_insn_reservation "1b_br" 0 + (and (and (eq_attr "cpu" "itanium") + (eq_attr "itanium_class" "br")) + (ne (symbol_ref "bundling_p") (const_int 0))) "1b_B") +(define_insn_reservation "1b_scall" 0 + (and (and (eq_attr "cpu" "itanium") + (eq_attr "itanium_class" "scall")) + (ne (symbol_ref "bundling_p") (const_int 0))) "1b_B") +(define_insn_reservation "1b_fcmp" 2 + (and (and (eq_attr "cpu" "itanium") + (eq_attr "itanium_class" "fcmp")) + (ne (symbol_ref "bundling_p") (const_int 0))) + "1b_F+1b_not_uf1") +(define_insn_reservation "1b_fcvtfx" 7 + (and (and (eq_attr "cpu" "itanium") + (eq_attr "itanium_class" "fcvtfx")) + (ne (symbol_ref "bundling_p") (const_int 0))) "1b_F") +(define_insn_reservation "1b_fld" 9 + (and (and (eq_attr "cpu" "itanium") + (eq_attr "itanium_class" "fld")) + (ne (symbol_ref "bundling_p") (const_int 0))) "1b_M") +(define_insn_reservation "1b_fmac" 5 + (and (and (eq_attr "cpu" "itanium") + (eq_attr "itanium_class" "fmac")) + (ne (symbol_ref "bundling_p") (const_int 0))) "1b_F") +(define_insn_reservation "1b_fmisc" 5 + (and (and (eq_attr "cpu" "itanium") + (eq_attr "itanium_class" "fmisc")) + (ne (symbol_ref "bundling_p") (const_int 0))) + "1b_F+1b_not_uf1") +(define_insn_reservation "1b_frar_i" 13 + (and (and (eq_attr "cpu" "itanium") + (eq_attr "itanium_class" "frar_i")) + (ne (symbol_ref "bundling_p") (const_int 0))) + "1b_I+1b_not_ui1") +(define_insn_reservation "1b_frar_m" 6 + (and (and (eq_attr "cpu" "itanium") + (eq_attr "itanium_class" "frar_m")) + (ne (symbol_ref "bundling_p") (const_int 0))) + "1b_M+1b_not_um1") +(define_insn_reservation "1b_frbr" 2 + (and (and (eq_attr "cpu" "itanium") + (eq_attr "itanium_class" "frbr")) + (ne (symbol_ref "bundling_p") (const_int 0))) + "1b_I+1b_not_ui1") +(define_insn_reservation "1b_frfr" 2 + (and (and (eq_attr "cpu" "itanium") + (eq_attr "itanium_class" "frfr")) + (ne (symbol_ref "bundling_p") (const_int 0))) + "1b_M+1b_not_um1") +(define_insn_reservation "1b_frpr" 2 + (and (and (eq_attr "cpu" "itanium") + (eq_attr "itanium_class" "frpr")) + (ne (symbol_ref "bundling_p") (const_int 0))) + "1b_I+1b_not_ui1") +(define_insn_reservation "1b_ialu" 1 + (and (and (eq_attr "cpu" "itanium") + (eq_attr "itanium_class" "ialu")) + (ne (symbol_ref + "bundling_p && !ia64_produce_address_p (insn)") + (const_int 0))) + "1b_A") +(define_insn_reservation "1b_ialu_addr" 1 + (and (and (eq_attr "cpu" "itanium") + (eq_attr "itanium_class" "ialu")) + (eq (symbol_ref + "bundling_p && ia64_produce_address_p (insn)") + (const_int 1))) + "1b_M") +(define_insn_reservation "1b_icmp" 1 + (and (and (eq_attr "cpu" "itanium") + (eq_attr "itanium_class" "icmp")) + (ne (symbol_ref "bundling_p") (const_int 0))) "1b_A") +(define_insn_reservation "1b_ilog" 1 + (and (and (eq_attr "cpu" "itanium") + (eq_attr "itanium_class" "ilog")) + (ne (symbol_ref "bundling_p") (const_int 0))) "1b_A") +(define_insn_reservation "1b_ishf" 1 + (and (and (eq_attr "cpu" "itanium") + (eq_attr "itanium_class" "ishf")) + (ne (symbol_ref "bundling_p") (const_int 0))) + "1b_I+1b_not_ui1") +(define_insn_reservation "1b_ld" 2 + (and (and (eq_attr "cpu" "itanium") + (eq_attr "itanium_class" "ld")) + (ne (symbol_ref "bundling_p") (const_int 0))) "1b_M") +(define_insn_reservation "1b_long_i" 1 + (and (and (eq_attr "cpu" "itanium") + (eq_attr "itanium_class" "long_i")) + (ne (symbol_ref "bundling_p") (const_int 0))) "1b_L") +(define_insn_reservation "1b_mmmul" 2 + (and (and (eq_attr "cpu" "itanium") + (eq_attr "itanium_class" "mmmul")) + (ne (symbol_ref "bundling_p") (const_int 0))) + "1b_I+1b_not_ui1") +(define_insn_reservation "1b_mmshf" 2 + (and (and (eq_attr "cpu" "itanium") + (eq_attr "itanium_class" "mmshf")) + (ne (symbol_ref "bundling_p") (const_int 0))) "1b_I") +(define_insn_reservation "1b_mmshfi" 2 + (and (and (eq_attr "cpu" "itanium") + (eq_attr "itanium_class" "mmshfi")) + (ne (symbol_ref "bundling_p") (const_int 0))) "1b_I") +(define_insn_reservation "1b_rse_m" 0 + (and (and (eq_attr "cpu" "itanium") + (eq_attr "itanium_class" "rse_m")) + (ne (symbol_ref "bundling_p") (const_int 0))) + "(1b_0m.ii|1b_0m.mi|1b_0m.fi|1b_0m.mf|1b_0b.bb|1b_0m.bb\ + |1b_0m.ib|1b_0m.mb|1b_0m.fb|1b_0m.lx)+1_1+1b_um0") +(define_insn_reservation "1b_sem" 0 + (and (and (eq_attr "cpu" "itanium") + (eq_attr "itanium_class" "sem")) + (ne (symbol_ref "bundling_p") (const_int 0))) + "1b_M+1b_not_um1") +(define_insn_reservation "1b_stf" 1 + (and (and (eq_attr "cpu" "itanium") + (eq_attr "itanium_class" "stf")) + (ne (symbol_ref "bundling_p") (const_int 0))) "1b_M") +(define_insn_reservation "1b_st" 1 + (and (and (eq_attr "cpu" "itanium") + (eq_attr "itanium_class" "st")) + (ne (symbol_ref "bundling_p") (const_int 0))) "1b_M") +(define_insn_reservation "1b_syst_m0" 0 + (and (and (eq_attr "cpu" "itanium") + (eq_attr "itanium_class" "syst_m0")) + (ne (symbol_ref "bundling_p") (const_int 0))) + "1b_M+1b_not_um1") +(define_insn_reservation "1b_syst_m" 0 + (and (and (eq_attr "cpu" "itanium") + (eq_attr "itanium_class" "syst_m")) + (ne (symbol_ref "bundling_p") (const_int 0))) "1b_M") +(define_insn_reservation "1b_tbit" 1 + (and (and (eq_attr "cpu" "itanium") + (eq_attr "itanium_class" "tbit")) + (ne (symbol_ref "bundling_p") (const_int 0))) + "1b_I+1b_not_ui1") +(define_insn_reservation "1b_toar_i" 0 + (and (and (eq_attr "cpu" "itanium") + (eq_attr "itanium_class" "toar_i")) + (ne (symbol_ref "bundling_p") (const_int 0))) + "1b_I+1b_not_ui1") +(define_insn_reservation "1b_toar_m" 5 + (and (and (eq_attr "cpu" "itanium") + (eq_attr "itanium_class" "toar_m")) + (ne (symbol_ref "bundling_p") (const_int 0))) + "1b_M+1b_not_um1") +(define_insn_reservation "1b_tobr" 1 + (and (and (eq_attr "cpu" "itanium") + (eq_attr "itanium_class" "tobr")) + (ne (symbol_ref "bundling_p") (const_int 0))) + "1b_I+1b_not_ui1") +(define_insn_reservation "1b_tofr" 9 + (and (and (eq_attr "cpu" "itanium") + (eq_attr "itanium_class" "tofr")) + (ne (symbol_ref "bundling_p") (const_int 0))) "1b_M") +(define_insn_reservation "1b_topr" 1 + (and (and (eq_attr "cpu" "itanium") + (eq_attr "itanium_class" "topr")) + (ne (symbol_ref "bundling_p") (const_int 0))) + "1b_I+1b_not_ui1") +(define_insn_reservation "1b_xmpy" 7 + (and (and (eq_attr "cpu" "itanium") + (eq_attr "itanium_class" "xmpy")) + (ne (symbol_ref "bundling_p") (const_int 0))) "1b_F") +(define_insn_reservation "1b_xtd" 1 + (and (and (eq_attr "cpu" "itanium") + (eq_attr "itanium_class" "xtd")) + (ne (symbol_ref "bundling_p") (const_int 0))) "1b_I") +(define_insn_reservation "1b_chk_s" 0 + (and (and (eq_attr "cpu" "itanium") + (eq_attr "itanium_class" "chk_s")) + (ne (symbol_ref "bundling_p") (const_int 0))) "1b_A") +(define_insn_reservation "1b_lfetch" 0 + (and (and (eq_attr "cpu" "itanium") + (eq_attr "itanium_class" "lfetch")) + (ne (symbol_ref "bundling_p") (const_int 0))) "1b_M") +(define_insn_reservation "1b_nop_m" 0 + (and (and (eq_attr "cpu" "itanium") + (eq_attr "itanium_class" "nop_m")) + (ne (symbol_ref "bundling_p") (const_int 0))) "1b_M") +(define_insn_reservation "1b_nop_b" 0 + (and (and (eq_attr "cpu" "itanium") + (eq_attr "itanium_class" "nop_b")) + (ne (symbol_ref "bundling_p") (const_int 0))) "1b_NB") +(define_insn_reservation "1b_nop_i" 0 + (and (and (eq_attr "cpu" "itanium") + (eq_attr "itanium_class" "nop_i")) + (ne (symbol_ref "bundling_p") (const_int 0))) "1b_I") +(define_insn_reservation "1b_nop_f" 0 + (and (and (eq_attr "cpu" "itanium") + (eq_attr "itanium_class" "nop_f")) + (ne (symbol_ref "bundling_p") (const_int 0))) "1b_F") +(define_insn_reservation "1b_nop_x" 0 + (and (and (eq_attr "cpu" "itanium") + (eq_attr "itanium_class" "nop_x")) + (ne (symbol_ref "bundling_p") (const_int 0))) "1b_L") +(define_insn_reservation "1b_unknown" 1 + (and (and (eq_attr "cpu" "itanium") + (eq_attr "itanium_class" "unknown")) + (ne (symbol_ref "bundling_p") (const_int 0))) + "1b_empty") +(define_insn_reservation "1b_nop" 1 + (and (and (eq_attr "cpu" "itanium") + (eq_attr "itanium_class" "nop")) + (ne (symbol_ref "bundling_p") (const_int 0))) + "1b_M|1b_NB|1b_I|1b_F") +(define_insn_reservation "1b_ignore" 0 + (and (and (eq_attr "cpu" "itanium") + (eq_attr "itanium_class" "ignore")) + (ne (symbol_ref "bundling_p") (const_int 0))) + "nothing") + +(define_insn_reservation "1b_pre_cycle" 0 + (and (and (eq_attr "cpu" "itanium") + (eq_attr "itanium_class" "pre_cycle")) + (ne (symbol_ref "bundling_p") (const_int 0))) + "(1b_0m_bs, 1b_m_cont) \ + | (1b_0mi_bs, 1b_mi_cont) \ + | (1b_0mm_bs, 1b_mm_cont) \ + | (1b_0mf_bs, 1b_mf_cont) \ + | (1b_0b_bs, 1b_b_cont) \ + | (1b_0bb_bs, 1b_bb_cont) \ + | (1b_0mb_bs, 1b_mb_cont) \ + | (1b_1m_bs, 1b_m_cont) \ + | (1b_1mi_bs, 1b_mi_cont) \ + | (1b_1mm_bs, 1b_mm_cont) \ + | (1b_1mf_bs, 1b_mf_cont) \ + | (1b_1b_bs, 1b_b_cont) \ + | (1b_1bb_bs, 1b_bb_cont) \ + | (1b_1mb_bs, 1b_mb_cont) \ + | (1b_m_stop, 1b_0mmi_cont) \ + | (1b_mi_stop, 1b_0mii_cont)") + diff --git a/gcc/config/ia64/itanium2.md b/gcc/config/ia64/itanium2.md new file mode 100644 index 00000000000..73b533ea70c --- /dev/null +++ b/gcc/config/ia64/itanium2.md @@ -0,0 +1,1762 @@ +;; Itanium2 DFA descriptions for insn scheduling and bundling. +;; Copyright (C) 2002 Free Software Foundation, Inc. +;; Contributed by Vladimir Makarov <vmakarov@redhat.com>. +;; +;; This file is part of GNU CC. +;; +;; GNU CC is free software; you can redistribute it and/or modify +;; it under the terms of the GNU General Public License as published by +;; the Free Software Foundation; either version 2, or (at your option) +;; any later version. +;; +;; GNU CC is distributed in the hope that it will be useful, +;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;; GNU General Public License for more details. +;; +;; You should have received a copy of the GNU General Public License +;; along with GNU CC; see the file COPYING. If not, write to +;; the Free Software Foundation, 59 Temple Place - Suite 330, +;; Boston, MA 02111-1307, USA. */ +;; + +/* This is description of pipeline hazards based on DFA. The + following constructions can be used for this: + + o define_cpu_unit string [string]) describes a cpu functional unit + (separated by comma). + + 1st operand: Names of cpu function units. + 2nd operand: Name of automaton (see comments for + DEFINE_AUTOMATON). + + All define_reservations and define_cpu_units should have unique + names which can not be "nothing". + + o (exclusion_set string string) means that each CPU function unit + in the first string can not be reserved simultaneously with each + unit whose name is in the second string and vise versa. CPU + units in the string are separated by commas. For example, it is + useful for description CPU with fully pipelined floating point + functional unit which can execute simultaneously only single + floating point insns or only double floating point insns. + + o (presence_set string string) means that each CPU function unit in + the first string can not be reserved unless at least one of + pattern of units whose names are in the second string is + reserved. This is an asymmetric relation. CPU units or unit + patterns in the strings are separated by commas. Pattern is one + unit name or unit names separated by white-spaces. + + For example, it is useful for description that slot1 is reserved + after slot0 reservation for a VLIW processor. We could describe + it by the following construction + + (presence_set "slot1" "slot0") + + Or slot1 is reserved only after slot0 and unit b0 reservation. + In this case we could write + + (presence_set "slot1" "slot0 b0") + + All CPU functional units in a set should belong to the same + automaton. + + o (final_presence_set string string) is analogous to + `presence_set'. The difference between them is when checking is + done. When an instruction is issued in given automaton state + reflecting all current and planned unit reservations, the + automaton state is changed. The first state is a source state, + the second one is a result state. Checking for `presence_set' is + done on the source state reservation, checking for + `final_presence_set' is done on the result reservation. This + construction is useful to describe a reservation which is + actually two subsequent reservations. For example, if we use + + (presence_set "slot1" "slot0") + + the following insn will be never issued (because slot1 requires + slot0 which is absent in the source state). + + (define_reservation "insn_and_nop" "slot0 + slot1") + + but it can be issued if we use analogous `final_presence_set'. + + o (absence_set string string) means that each CPU function unit in + the first string can be reserved only if each pattern of units + whose names are in the second string is not reserved. This is an + asymmetric relation (actually exclusion set is analogous to this + one but it is symmetric). CPU units or unit patterns in the + string are separated by commas. Pattern is one unit name or unit + names separated by white-spaces. + + For example, it is useful for description that slot0 can not be + reserved after slot1 or slot2 reservation for a VLIW processor. + We could describe it by the following construction + + (absence_set "slot2" "slot0, slot1") + + Or slot2 can not be reserved if slot0 and unit b0 are reserved or + slot1 and unit b1 are reserved . In this case we could write + + (absence_set "slot2" "slot0 b0, slot1 b1") + + All CPU functional units in a set should to belong the same + automaton. + + o (final_absence_set string string) is analogous to `absence_set' but + checking is done on the result (state) reservation. See comments + for final_presence_set. + + o (define_bypass number out_insn_names in_insn_names) names bypass with + given latency (the first number) from insns given by the first + string (see define_insn_reservation) into insns given by the + second string. Insn names in the strings are separated by + commas. + + o (define_automaton string) describes names of an automaton + generated and used for pipeline hazards recognition. The names + are separated by comma. Actually it is possibly to generate the + single automaton but unfortunately it can be very large. If we + use more one automata, the summary size of the automata usually + is less than the single one. The automaton name is used in + define_cpu_unit. All automata should have unique names. + + o (automata_option string) describes option for generation of + automata. Currently there are the following options: + + o "no-minimization" which makes no minimization of automata. + This is only worth to do when we are debugging the description + and need to look more accurately at reservations of states. + + o "ndfa" which makes automata with nondetermenistic reservation + by insns. + + o (define_reservation string string) names reservation (the first + string) of cpu functional units (the 2nd string). Sometimes unit + reservations for different insns contain common parts. In such + case, you describe common part and use one its name (the 1st + parameter) in regular expression in define_insn_reservation. All + define_reservations, define results and define_cpu_units should + have unique names which can not be "nothing". + + o (define_insn_reservation name default_latency condition regexpr) + describes reservation of cpu functional units (the 3nd operand) + for instruction which is selected by the condition (the 2nd + parameter). The first parameter is used for output of debugging + information. The reservations are described by a regular + expression according the following syntax: + + regexp = regexp "," oneof + | oneof + + oneof = oneof "|" allof + | allof + + allof = allof "+" repeat + | repeat + + repeat = element "*" number + | element + + element = cpu_function_name + | reservation_name + | result_name + | "nothing" + | "(" regexp ")" + + 1. "," is used for describing start of the next cycle in + reservation. + + 2. "|" is used for describing the reservation described by the + first regular expression *or* the reservation described by + the second regular expression *or* etc. + + 3. "+" is used for describing the reservation described by the + first regular expression *and* the reservation described by + the second regular expression *and* etc. + + 4. "*" is used for convinience and simply means sequence in + which the regular expression are repeated NUMBER times with + cycle advancing (see ","). + + 5. cpu function unit name which means reservation. + + 6. reservation name -- see define_reservation. + + 7. string "nothing" means no units reservation. + +*/ + +(define_automaton "two") + +;; All possible combinations of bundles/syllables +(define_cpu_unit "2_0m.ii, 2_0m.mi, 2_0m.fi, 2_0m.mf, 2_0b.bb, 2_0m.bb,\ + 2_0m.ib, 2_0m.mb, 2_0m.fb, 2_0m.lx" "two") +(define_cpu_unit "2_0mi.i, 2_0mm.i, 2_0mf.i, 2_0mm.f, 2_0bb.b, 2_0mb.b,\ + 2_0mi.b, 2_0mm.b, 2_0mf.b, 2_0mlx." "two") +(define_cpu_unit "2_0mii., 2_0mmi., 2_0mfi., 2_0mmf., 2_0bbb., 2_0mbb.,\ + 2_0mib., 2_0mmb., 2_0mfb." "two") + +(define_cpu_unit "2_1m.ii, 2_1m.mi, 2_1m.fi, 2_1m.mf, 2_1b.bb, 2_1m.bb,\ + 2_1m.ib, 2_1m.mb, 2_1m.fb, 2_1m.lx" "two") +(define_cpu_unit "2_1mi.i, 2_1mm.i, 2_1mf.i, 2_1mm.f, 2_1bb.b, 2_1mb.b,\ + 2_1mi.b, 2_1mm.b, 2_1mf.b, 2_1mlx." "two") +(define_cpu_unit "2_1mii., 2_1mmi., 2_1mfi., 2_1mmf., 2_1bbb., 2_1mbb.,\ + 2_1mib., 2_1mmb., 2_1mfb." "two") + +;; Slot 1 +(exclusion_set "2_0m.ii" "2_0m.mi, 2_0m.fi, 2_0m.mf, 2_0b.bb, 2_0m.bb,\ + 2_0m.ib, 2_0m.mb, 2_0m.fb, 2_0m.lx") +(exclusion_set "2_0m.mi" "2_0m.fi, 2_0m.mf, 2_0b.bb, 2_0m.bb, 2_0m.ib,\ + 2_0m.mb, 2_0m.fb, 2_0m.lx") +(exclusion_set "2_0m.fi" "2_0m.mf, 2_0b.bb, 2_0m.bb, 2_0m.ib, 2_0m.mb,\ + 2_0m.fb, 2_0m.lx") +(exclusion_set "2_0m.mf" "2_0b.bb, 2_0m.bb, 2_0m.ib, 2_0m.mb, 2_0m.fb,\ + 2_0m.lx") +(exclusion_set "2_0b.bb" "2_0m.bb, 2_0m.ib, 2_0m.mb, 2_0m.fb, 2_0m.lx") +(exclusion_set "2_0m.bb" "2_0m.ib, 2_0m.mb, 2_0m.fb, 2_0m.lx") +(exclusion_set "2_0m.ib" "2_0m.mb, 2_0m.fb, 2_0m.lx") +(exclusion_set "2_0m.mb" "2_0m.fb, 2_0m.lx") +(exclusion_set "2_0m.fb" "2_0m.lx") + +;; Slot 2 +(exclusion_set "2_0mi.i" "2_0mm.i, 2_0mf.i, 2_0mm.f, 2_0bb.b, 2_0mb.b,\ + 2_0mi.b, 2_0mm.b, 2_0mf.b, 2_0mlx.") +(exclusion_set "2_0mm.i" "2_0mf.i, 2_0mm.f, 2_0bb.b, 2_0mb.b,\ + 2_0mi.b, 2_0mm.b, 2_0mf.b, 2_0mlx.") +(exclusion_set "2_0mf.i" "2_0mm.f, 2_0bb.b, 2_0mb.b, 2_0mi.b, 2_0mm.b,\ + 2_0mf.b, 2_0mlx.") +(exclusion_set "2_0mm.f" "2_0bb.b, 2_0mb.b, 2_0mi.b, 2_0mm.b, 2_0mf.b,\ + 2_0mlx.") +(exclusion_set "2_0bb.b" "2_0mb.b, 2_0mi.b, 2_0mm.b, 2_0mf.b, 2_0mlx.") +(exclusion_set "2_0mb.b" "2_0mi.b, 2_0mm.b, 2_0mf.b, 2_0mlx.") +(exclusion_set "2_0mi.b" "2_0mm.b, 2_0mf.b, 2_0mlx.") +(exclusion_set "2_0mm.b" "2_0mf.b, 2_0mlx.") +(exclusion_set "2_0mf.b" "2_0mlx.") + +;; Slot 3 +(exclusion_set "2_0mii." "2_0mmi., 2_0mfi., 2_0mmf., 2_0bbb., 2_0mbb.,\ + 2_0mib., 2_0mmb., 2_0mfb., 2_0mlx.") +(exclusion_set "2_0mmi." "2_0mfi., 2_0mmf., 2_0bbb., 2_0mbb.,\ + 2_0mib., 2_0mmb., 2_0mfb., 2_0mlx.") +(exclusion_set "2_0mfi." "2_0mmf., 2_0bbb., 2_0mbb., 2_0mib., 2_0mmb.,\ + 2_0mfb., 2_0mlx.") +(exclusion_set "2_0mmf." "2_0bbb., 2_0mbb., 2_0mib., 2_0mmb., 2_0mfb.,\ + 2_0mlx.") +(exclusion_set "2_0bbb." "2_0mbb., 2_0mib., 2_0mmb., 2_0mfb., 2_0mlx.") +(exclusion_set "2_0mbb." "2_0mib., 2_0mmb., 2_0mfb., 2_0mlx.") +(exclusion_set "2_0mib." "2_0mmb., 2_0mfb., 2_0mlx.") +(exclusion_set "2_0mmb." "2_0mfb., 2_0mlx.") +(exclusion_set "2_0mfb." "2_0mlx.") + +;; Slot 4 +(exclusion_set "2_1m.ii" "2_1m.mi, 2_1m.fi, 2_1m.mf, 2_1b.bb, 2_1m.bb,\ + 2_1m.ib, 2_1m.mb, 2_1m.fb, 2_1m.lx") +(exclusion_set "2_1m.mi" "2_1m.fi, 2_1m.mf, 2_1b.bb, 2_1m.bb, 2_1m.ib,\ + 2_1m.mb, 2_1m.fb, 2_1m.lx") +(exclusion_set "2_1m.fi" "2_1m.mf, 2_1b.bb, 2_1m.bb, 2_1m.ib, 2_1m.mb,\ + 2_1m.fb, 2_1m.lx") +(exclusion_set "2_1m.mf" "2_1b.bb, 2_1m.bb, 2_1m.ib, 2_1m.mb, 2_1m.fb,\ + 2_1m.lx") +(exclusion_set "2_1b.bb" "2_1m.bb, 2_1m.ib, 2_1m.mb, 2_1m.fb, 2_1m.lx") +(exclusion_set "2_1m.bb" "2_1m.ib, 2_1m.mb, 2_1m.fb, 2_1m.lx") +(exclusion_set "2_1m.ib" "2_1m.mb, 2_1m.fb, 2_1m.lx") +(exclusion_set "2_1m.mb" "2_1m.fb, 2_1m.lx") +(exclusion_set "2_1m.fb" "2_1m.lx") + +;; Slot 5 +(exclusion_set "2_1mi.i" "2_1mm.i, 2_1mf.i, 2_1mm.f, 2_1bb.b, 2_1mb.b,\ + 2_1mi.b, 2_1mm.b, 2_1mf.b, 2_1mlx.") +(exclusion_set "2_1mm.i" "2_1mf.i, 2_1mm.f, 2_1bb.b, 2_1mb.b,\ + 2_1mi.b, 2_1mm.b, 2_1mf.b, 2_1mlx.") +(exclusion_set "2_1mf.i" "2_1mm.f, 2_1bb.b, 2_1mb.b, 2_1mi.b, 2_1mm.b,\ + 2_1mf.b, 2_1mlx.") +(exclusion_set "2_1mm.f" "2_1bb.b, 2_1mb.b, 2_1mi.b, 2_1mm.b, 2_1mf.b,\ + 2_1mlx.") +(exclusion_set "2_1bb.b" "2_1mb.b, 2_1mi.b, 2_1mm.b, 2_1mf.b, 2_1mlx.") +(exclusion_set "2_1mb.b" "2_1mi.b, 2_1mm.b, 2_1mf.b, 2_1mlx.") +(exclusion_set "2_1mi.b" "2_1mm.b, 2_1mf.b, 2_1mlx.") +(exclusion_set "2_1mm.b" "2_1mf.b, 2_1mlx.") +(exclusion_set "2_1mf.b" "2_1mlx.") + +;; Slot 6 +(exclusion_set "2_1mii." "2_1mmi., 2_1mfi., 2_1mmf., 2_1bbb., 2_1mbb.,\ + 2_1mib., 2_1mmb., 2_1mfb., 2_1mlx.") +(exclusion_set "2_1mmi." "2_1mfi., 2_1mmf., 2_1bbb., 2_1mbb.,\ + 2_1mib., 2_1mmb., 2_1mfb., 2_1mlx.") +(exclusion_set "2_1mfi." "2_1mmf., 2_1bbb., 2_1mbb., 2_1mib., 2_1mmb.,\ + 2_1mfb., 2_1mlx.") +(exclusion_set "2_1mmf." "2_1bbb., 2_1mbb., 2_1mib., 2_1mmb., 2_1mfb.,\ + 2_1mlx.") +(exclusion_set "2_1bbb." "2_1mbb., 2_1mib., 2_1mmb., 2_1mfb., 2_1mlx.") +(exclusion_set "2_1mbb." "2_1mib., 2_1mmb., 2_1mfb., 2_1mlx.") +(exclusion_set "2_1mib." "2_1mmb., 2_1mfb., 2_1mlx.") +(exclusion_set "2_1mmb." "2_1mfb., 2_1mlx.") +(exclusion_set "2_1mfb." "2_1mlx.") + +(final_presence_set "2_0mi.i" "2_0m.ii") +(final_presence_set "2_0mii." "2_0mi.i") +(final_presence_set "2_1mi.i" "2_1m.ii") +(final_presence_set "2_1mii." "2_1mi.i") + +(final_presence_set "2_0mm.i" "2_0m.mi") +(final_presence_set "2_0mmi." "2_0mm.i") +(final_presence_set "2_1mm.i" "2_1m.mi") +(final_presence_set "2_1mmi." "2_1mm.i") + +(final_presence_set "2_0mf.i" "2_0m.fi") +(final_presence_set "2_0mfi." "2_0mf.i") +(final_presence_set "2_1mf.i" "2_1m.fi") +(final_presence_set "2_1mfi." "2_1mf.i") + +(final_presence_set "2_0mm.f" "2_0m.mf") +(final_presence_set "2_0mmf." "2_0mm.f") +(final_presence_set "2_1mm.f" "2_1m.mf") +(final_presence_set "2_1mmf." "2_1mm.f") + +(final_presence_set "2_0bb.b" "2_0b.bb") +(final_presence_set "2_0bbb." "2_0bb.b") +(final_presence_set "2_1bb.b" "2_1b.bb") +(final_presence_set "2_1bbb." "2_1bb.b") + +(final_presence_set "2_0mb.b" "2_0m.bb") +(final_presence_set "2_0mbb." "2_0mb.b") +(final_presence_set "2_1mb.b" "2_1m.bb") +(final_presence_set "2_1mbb." "2_1mb.b") + +(final_presence_set "2_0mi.b" "2_0m.ib") +(final_presence_set "2_0mib." "2_0mi.b") +(final_presence_set "2_1mi.b" "2_1m.ib") +(final_presence_set "2_1mib." "2_1mi.b") + +(final_presence_set "2_0mm.b" "2_0m.mb") +(final_presence_set "2_0mmb." "2_0mm.b") +(final_presence_set "2_1mm.b" "2_1m.mb") +(final_presence_set "2_1mmb." "2_1mm.b") + +(final_presence_set "2_0mf.b" "2_0m.fb") +(final_presence_set "2_0mfb." "2_0mf.b") +(final_presence_set "2_1mf.b" "2_1m.fb") +(final_presence_set "2_1mfb." "2_1mf.b") + +(final_presence_set "2_0mlx." "2_0m.lx") +(final_presence_set "2_1mlx." "2_1m.lx") + +;; The following reflects the dual issue bundle types table. +;; We could place all possible combinations here because impossible +;; combinations would go away by the subsequent constrains. +(final_presence_set + "2_1m.lx" + "2_0mmi.,2_0mfi.,2_0mmf.,2_0mib.,2_0mmb.,2_0mfb.,2_0mlx.") +(final_presence_set "2_1b.bb" "2_0mii.,2_0mmi.,2_0mfi.,2_0mmf.,2_0mlx.") +(final_presence_set + "2_1m.ii,2_1m.mi,2_1m.fi,2_1m.mf,2_1m.bb,2_1m.ib,2_1m.mb,2_1m.fb" + "2_0mii.,2_0mmi.,2_0mfi.,2_0mmf.,2_0mib.,2_0mmb.,2_0mfb.,2_0mlx.") + +;; Ports/units (nb means nop.b insn issued into given port): +(define_cpu_unit + "2_um0, 2_um1, 2_um2, 2_um3, 2_ui0, 2_ui1, 2_uf0, 2_uf1,\ + 2_ub0, 2_ub1, 2_ub2, 2_unb0, 2_unb1, 2_unb2" "two") + +(exclusion_set "2_ub0" "2_unb0") +(exclusion_set "2_ub1" "2_unb1") +(exclusion_set "2_ub2" "2_unb2") + +;; The following rules are used to decrease number of alternatives. +;; They are consequences of Itanium2 microarchitecture. They also +;; describe the following rules mentioned in Itanium2 +;; microarchitecture: rules mentioned in Itanium2 microarchitecture: +;; o "BBB/MBB: Always splits issue after either of these bundles". +;; o "MIB BBB: Split issue after the first bundle in this pair". +(exclusion_set + "2_0b.bb,2_0bb.b,2_0bbb.,2_0m.bb,2_0mb.b,2_0mbb." + "2_1m.ii,2_1m.mi,2_1m.fi,2_1m.mf,2_1b.bb,2_1m.bb,\ + 2_1m.ib,2_1m.mb,2_1m.fb,2_1m.lx") +(exclusion_set "2_0m.ib,2_0mi.b,2_0mib." "2_1b.bb") + +;;; "MIB/MFB/MMB: Splits issue after any of these bundles unless the +;;; B-slot contains a nop.b or a brp instruction". +;;; "The B in an MIB/MFB/MMB bundle disperses to B0 if it is a brp or +;;; nop.b, otherwise it disperses to B2". +(final_absence_set + "2_1m.ii, 2_1m.mi, 2_1m.fi, 2_1m.mf, 2_1b.bb, 2_1m.bb,\ + 2_1m.ib, 2_1m.mb, 2_1m.fb, 2_1m.lx" + "2_0mib. 2_ub2, 2_0mfb. 2_ub2, 2_0mmb. 2_ub2") + +;; This is necessary to start new processor cycle when we meet stop bit. +(define_cpu_unit "2_stop" "two") +(final_absence_set + "2_0m.ii,2_0mi.i,2_0mii.,2_0m.mi,2_0mm.i,2_0mmi.,2_0m.fi,2_0mf.i,2_0mfi.,\ + 2_0m.mf,2_0mm.f,2_0mmf.,2_0b.bb,2_0bb.b,2_0bbb.,2_0m.bb,2_0mb.b,2_0mbb.,\ + 2_0m.ib,2_0mi.b,2_0mib.,2_0m.mb,2_0mm.b,2_0mmb.,2_0m.fb,2_0mf.b,2_0mfb.,\ + 2_0m.lx,2_0mlx., \ + 2_1m.ii,2_1mi.i,2_1mii.,2_1m.mi,2_1mm.i,2_1mmi.,2_1m.fi,2_1mf.i,2_1mfi.,\ + 2_1m.mf,2_1mm.f,2_1mmf.,2_1b.bb,2_1bb.b,2_1bbb.,2_1m.bb,2_1mb.b,2_1mbb.,\ + 2_1m.ib,2_1mi.b,2_1mib.,2_1m.mb,2_1mm.b,2_1mmb.,2_1m.fb,2_1mf.b,2_1mfb.,\ + 2_1m.lx,2_1mlx." + "2_stop") + +;; The issue logic can reorder M slot insns between different subtypes +;; but can not reorder insn within the same subtypes. The following +;; constraint is enough to describe this. +(final_presence_set "2_um1" "2_um0") +(final_presence_set "2_um3" "2_um2") + +;; The insn in the 1st I slot of the two bundle issue group will issue +;; to I0. The second I slot insn will issue to I1. +(final_presence_set "2_ui1" "2_ui0") + +;; For exceptions of I insns: +(define_cpu_unit "2_only_ui0" "two") +(final_absence_set "2_only_ui0" "2_ui1") + +;; Insns + +(define_reservation "2_M0" + "(2_0m.ii|2_0m.mi|2_0m.fi|2_0m.mf|2_0m.bb|2_0m.ib|2_0m.mb|2_0m.fb|2_0m.lx\ + |2_1m.ii|2_1m.mi|2_1m.fi|2_1m.mf|2_1m.bb|2_1m.ib|2_1m.mb|2_1m.fb|2_1m.lx\ + |2_0mm.i|2_0mm.f|2_0mm.b|2_1mm.i|2_1mm.f|2_1mm.b)\ + +(2_um0|2_um1|2_um2|2_um3)") + +(define_reservation "2_M1" + "(2_0mii.+(2_ui0|2_ui1)|2_0mmi.+2_ui0|2_0mfi.+2_ui0|2_0mmf.+2_uf0\ + |2_0mib.+2_unb0|2_0mfb.+2_unb0|2_0mmb.+2_unb0)\ + +(2_1m.ii|2_1m.mi|2_1m.fi|2_1m.mf|2_1m.bb|2_1m.ib|2_1m.mb|2_1m.fb|2_1m.lx)\ + +(2_um0|2_um1|2_um2|2_um3)") + +(define_reservation "2_M" "2_M0|2_M1") + +(define_reservation "2_M0_only_um0" + "(2_0m.ii|2_0m.mi|2_0m.fi|2_0m.mf|2_0m.bb|2_0m.ib|2_0m.mb|2_0m.fb|2_0m.lx\ + |2_1m.ii|2_1m.mi|2_1m.fi|2_1m.mf|2_1m.bb|2_1m.ib|2_1m.mb|2_1m.fb|2_1m.lx\ + |2_0mm.i|2_0mm.f|2_0mm.b|2_1mm.i|2_1mm.f|2_1mm.b)\ + +2_um0") + +(define_reservation "2_M1_only_um0" + "(2_0mii.+(2_ui0|2_ui1)|2_0mmi.+2_ui0|2_0mfi.+2_ui0|2_0mmf.+2_uf0\ + |2_0mib.+2_unb0|2_0mfb.+2_unb0|2_0mmb.+2_unb0)\ + +(2_1m.ii|2_1m.mi|2_1m.fi|2_1m.mf|2_1m.bb|2_1m.ib|2_1m.mb|2_1m.fb|2_1m.lx)\ + +2_um0") + +(define_reservation "2_M_only_um0" "2_M0_only_um0|2_M1_only_um0") + +(define_reservation "2_M0_only_um2" + "(2_0m.ii|2_0m.mi|2_0m.fi|2_0m.mf|2_0m.bb|2_0m.ib|2_0m.mb|2_0m.fb|2_0m.lx\ + |2_1m.ii|2_1m.mi|2_1m.fi|2_1m.mf|2_1m.bb|2_1m.ib|2_1m.mb|2_1m.fb|2_1m.lx\ + |2_0mm.i|2_0mm.f|2_0mm.b|2_1mm.i|2_1mm.f|2_1mm.b)\ + +2_um2") + +(define_reservation "2_M1_only_um2" + "(2_0mii.+(2_ui0|2_ui1)|2_0mmi.+2_ui0|2_0mfi.+2_ui0|2_0mmf.+2_uf0\ + |2_0mib.+2_unb0|2_0mfb.+2_unb0|2_0mmb.+2_unb0)\ + +(2_1m.ii|2_1m.mi|2_1m.fi|2_1m.mf|2_1m.bb|2_1m.ib|2_1m.mb|2_1m.fb|2_1m.lx)\ + +2_um2") + +(define_reservation "2_M_only_um2" "2_M0_only_um2|2_M1_only_um2") + +(define_reservation "2_M0_only_um23" + "(2_0m.ii|2_0m.mi|2_0m.fi|2_0m.mf|2_0m.bb|2_0m.ib|2_0m.mb|2_0m.fb|2_0m.lx\ + |2_1m.ii|2_1m.mi|2_1m.fi|2_1m.mf|2_1m.bb|2_1m.ib|2_1m.mb|2_1m.fb|2_1m.lx\ + |2_0mm.i|2_0mm.f|2_0mm.b|2_1mm.i|2_1mm.f|2_1mm.b)\ + +(2_um2|2_um3)") + +(define_reservation "2_M1_only_um23" + "(2_0mii.+(2_ui0|2_ui1)|2_0mmi.+2_ui0|2_0mfi.+2_ui0|2_0mmf.+2_uf0\ + |2_0mib.+2_unb0|2_0mfb.+2_unb0|2_0mmb.+2_unb0)\ + +(2_1m.ii|2_1m.mi|2_1m.fi|2_1m.mf|2_1m.bb|2_1m.ib|2_1m.mb|2_1m.fb|2_1m.lx)\ + +(2_um2|2_um3)") + +(define_reservation "2_M_only_um23" "2_M0_only_um23|2_M1_only_um23") + +(define_reservation "2_M0_only_um01" + "(2_0m.ii|2_0m.mi|2_0m.fi|2_0m.mf|2_0m.bb|2_0m.ib|2_0m.mb|2_0m.fb|2_0m.lx\ + |2_1m.ii|2_1m.mi|2_1m.fi|2_1m.mf|2_1m.bb|2_1m.ib|2_1m.mb|2_1m.fb|2_1m.lx\ + |2_0mm.i|2_0mm.f|2_0mm.b|2_1mm.i|2_1mm.f|2_1mm.b)\ + +(2_um0|2_um1)") + +(define_reservation "2_M1_only_um01" + "(2_0mii.+(2_ui0|2_ui1)|2_0mmi.+2_ui0|2_0mfi.+2_ui0|2_0mmf.+2_uf0\ + |2_0mib.+2_unb0|2_0mfb.+2_unb0|2_0mmb.+2_unb0)\ + +(2_1m.ii|2_1m.mi|2_1m.fi|2_1m.mf|2_1m.bb|2_1m.ib|2_1m.mb|2_1m.fb|2_1m.lx)\ + +(2_um0|2_um1)") + +(define_reservation "2_M_only_um01" "2_M0_only_um01|2_M1_only_um01") + +;; I instruction is dispersed to the lowest numbered I unit +;; not already in use. Remeber about possible spliting. +(define_reservation "2_I0" + "2_0mi.i+2_ui0|2_0mii.+(2_ui0|2_ui1)|2_0mmi.+2_ui0\ + |2_0mfi.+2_ui0|2_0mi.b+2_ui0|(2_1mi.i|2_1mi.b)+(2_ui0|2_ui1)\ + |(2_1mii.|2_1mmi.|2_1mfi.)+(2_ui0|2_ui1)") + +(define_reservation "2_I1" + "2_0m.ii+(2_um0|2_um1|2_um2|2_um3)+2_0mi.i+2_ui0\ + |2_0mm.i+(2_um0|2_um1|2_um2|2_um3)+2_0mmi.+2_ui0\ + |2_0mf.i+2_uf0+2_0mfi.+2_ui0\ + |2_0m.ib+(2_um0|2_um1|2_um2|2_um3)+2_0mi.b+2_ui0\ + |(2_1m.ii+2_1mi.i|2_1m.ib+2_1mi.b)+(2_um0|2_um1|2_um2|2_um3)+(2_ui0|2_ui1)\ + |2_1mm.i+(2_um0|2_um1|2_um2|2_um3)+2_1mmi.+(2_ui0|2_ui1)\ + |2_1mf.i+2_uf1+2_1mfi.+(2_ui0|2_ui1)") + +(define_reservation "2_I" "2_I0|2_I1") + +;; "An F slot in the 1st bundle disperses to F0". +;; "An F slot in the 2st bundle disperses to F1". +(define_reservation "2_F0" + "2_0mf.i+2_uf0|2_0mmf.+2_uf0|2_0mf.b+2_uf0\ + |2_1mf.i+2_uf1|2_1mmf.+2_uf1|2_1mf.b+2_uf1") + +(define_reservation "2_F1" + "(2_0m.fi+2_0mf.i|2_0mm.f+2_0mmf.|2_0m.fb+2_0mf.b)\ + +(2_um0|2_um1|2_um2|2_um3)+2_uf0\ + |(2_1m.fi+2_1mf.i|2_1mm.f+2_1mmf.|2_1m.fb+2_1mf.b)\ + +(2_um0|2_um1|2_um2|2_um3)+2_uf1") + +(define_reservation "2_F2" + "(2_0m.mf+2_0mm.f+2_0mmf.+2_uf0|2_1m.mf+2_1mm.f+2_1mmf.+2_uf1)\ + +(2_um0|2_um1|2_um2|2_um3)+(2_um0|2_um1|2_um2|2_um3)\ + |(2_0mii.+(2_ui0|2_ui1)|2_0mmi.+2_ui0|2_0mfi.+2_ui0\ + |2_0mmf.+(2_um0|2_um1|2_um2|2_um3)\ + |2_0mib.+2_unb0|2_0mmb.+2_unb0|2_0mfb.+2_unb0)\ + +(2_1m.fi+2_1mf.i|2_1m.fb+2_1mf.b)+(2_um0|2_um1|2_um2|2_um3)+2_uf1") + +(define_reservation "2_F" "2_F0|2_F1|2_F2") + +;;; "Each B slot in MBB or BBB bundle disperses to the corresponding B +;;; unit. That is, a B slot in 1st position is despersed to B0. In the +;;; 2nd position it is dispersed to B2". +(define_reservation "2_NB" + "2_0b.bb+2_unb0|2_0bb.b+2_unb1|2_0bbb.+2_unb2\ + |2_0mb.b+2_unb1|2_0mbb.+2_unb2|2_0mib.+2_unb0\ + |2_0mmb.+2_unb0|2_0mfb.+2_unb0\ + |2_1b.bb+2_unb0|2_1bb.b+2_unb1 + |2_1bbb.+2_unb2|2_1mb.b+2_unb1|2_1mbb.+2_unb2\ + |2_1mib.+2_unb0|2_1mmb.+2_unb0|2_1mfb.+2_unb0") + +(define_reservation "2_B0" + "2_0b.bb+2_ub0|2_0bb.b+2_ub1|2_0bbb.+2_ub2\ + |2_0mb.b+2_ub1|2_0mbb.+2_ub2|2_0mib.+2_ub2\ + |2_0mfb.+2_ub2|2_1b.bb+2_ub0|2_1bb.b+2_ub1\ + |2_1bbb.+2_ub2|2_1mb.b+2_ub1\ + |2_1mib.+2_ub2|2_1mmb.+2_ub2|2_1mfb.+2_ub2") + +(define_reservation "2_B1" + "2_0m.bb+(2_um0|2_um1|2_um2|2_um3)+2_0mb.b+2_ub1\ + |2_0mi.b+2_ui0+2_0mib.+2_ub2\ + |2_0mm.b+(2_um0|2_um1|2_um2|2_um3)+2_0mmb.+2_ub2\ + |2_0mf.b+2_uf0+2_0mfb.+2_ub2\ + |(2_0mii.+(2_ui0|2_ui1)|2_0mmi.+2_ui0|2_0mfi.+2_ui0|2_0mmf.+2_uf0)\ + +2_1b.bb+2_ub0\ + |2_1m.bb+(2_um0|2_um1|2_um2|2_um3)+2_1mb.b+2_ub1\ + |2_1mi.b+(2_ui0|2_ui1)+2_1mib.+2_ub2\ + |2_1mm.b+(2_um0|2_um1|2_um2|2_um3)+2_1mmb.+2_ub2\ + |2_1mf.b+2_uf1+2_1mfb.+2_ub2") + +(define_reservation "2_B" "2_B0|2_B1") + +;; MLX bunlde uses ports equivalent to MFI bundles. + +;; For the MLI template, the I slot insn is always assigned to port I0 +;; if it is in the first bundle or it is assigned to port I1 if it is in +;; the second bundle. +(define_reservation "2_L0" "2_0mlx.+2_ui0+2_uf0|2_1mlx.+2_ui1+2_uf1") + +(define_reservation "2_L1" + "2_0m.lx+(2_um0|2_um1|2_um2|2_um3)+2_0mlx.+2_ui0+2_uf0\ + |2_1m.lx+(2_um0|2_um1|2_um2|2_um3)+2_1mlx.+2_ui1+2_uf1") + +(define_reservation "2_L2" + "(2_0mii.+(2_ui0|2_ui1)|2_0mmi.+2_ui0|2_0mfi.+2_ui0|2_0mmf.+2_uf0\ + |2_0mib.+2_unb0|2_0mmb.+2_unb0|2_0mfb.+2_unb0) + +2_1m.lx+(2_um0|2_um1|2_um2|2_um3)+2_1mlx.+2_ui1+2_uf1") + +(define_reservation "2_L" "2_L0|2_L1|2_L2") + +;; Should we describe that A insn in I slot can be issued into M +;; ports? I think it is not necessary because of multipass +;; scheduling. For example, the multipass scheduling could use +;; MMI-MMI instead of MII-MII where the two last I slots contain A +;; insns (even if the case is complicated by use-def conflicts). +;; +;; In any case we could describe it as +;; (define_cpu_unit "2_ui1_0pres,2_ui1_1pres,2_ui1_2pres,2_ui1_3pres" "two") +;; (final_presence_set "2_ui1_0pres,2_ui1_1pres,2_ui1_2pres,2_ui1_3pres" +;; "2_ui1") +;; (define_reservation "b_A" +;; "b_M|b_I\ +;; |(2_1mi.i|2_1mii.|2_1mmi.|2_1mfi.|2_1mi.b)+(2_um0|2_um1|2_um2|2_um3)\ +;; +(2_ui1_0pres|2_ui1_1pres|2_ui1_2pres|2_ui1_3pres)") + +(define_reservation "2_A" "2_M|2_I") + +;; We assume that there is no insn issued on the same cycle as the +;; unknown insn. +(define_cpu_unit "2_empty" "two") +(exclusion_set "2_empty" + "2_0m.ii,2_0m.mi,2_0m.fi,2_0m.mf,2_0b.bb,2_0m.bb,2_0m.ib,2_0m.mb,2_0m.fb,\ + 2_0m.lx") + +(define_cpu_unit + "2_0m_bs, 2_0mi_bs, 2_0mm_bs, 2_0mf_bs, 2_0b_bs, 2_0bb_bs, 2_0mb_bs" + "two") +(define_cpu_unit + "2_1m_bs, 2_1mi_bs, 2_1mm_bs, 2_1mf_bs, 2_1b_bs, 2_1bb_bs, 2_1mb_bs" + "two") + +(define_cpu_unit "2_m_cont, 2_mi_cont, 2_mm_cont, 2_mf_cont, 2_mb_cont,\ + 2_b_cont, 2_bb_cont" "two") + +;; For stop in the middle of the bundles. +(define_cpu_unit "2_m_stop, 2_m0_stop, 2_m1_stop, 2_0mmi_cont" "two") +(define_cpu_unit "2_mi_stop, 2_mi0_stop, 2_mi1_stop, 2_0mii_cont" "two") + +(final_presence_set "2_0m_bs" + "2_0m.ii, 2_0m.mi, 2_0m.mf, 2_0m.fi, 2_0m.bb,\ + 2_0m.ib, 2_0m.fb, 2_0m.mb, 2_0m.lx") +(final_presence_set "2_1m_bs" + "2_1m.ii, 2_1m.mi, 2_1m.mf, 2_1m.fi, 2_1m.bb,\ + 2_1m.ib, 2_1m.fb, 2_1m.mb, 2_1m.lx") +(final_presence_set "2_0mi_bs" "2_0mi.i, 2_0mi.i") +(final_presence_set "2_1mi_bs" "2_1mi.i, 2_1mi.i") +(final_presence_set "2_0mm_bs" "2_0mm.i, 2_0mm.f, 2_0mm.b") +(final_presence_set "2_1mm_bs" "2_1mm.i, 2_1mm.f, 2_1mm.b") +(final_presence_set "2_0mf_bs" "2_0mf.i, 2_0mf.b") +(final_presence_set "2_1mf_bs" "2_1mf.i, 2_1mf.b") +(final_presence_set "2_0b_bs" "2_0b.bb") +(final_presence_set "2_1b_bs" "2_1b.bb") +(final_presence_set "2_0bb_bs" "2_0bb.b") +(final_presence_set "2_1bb_bs" "2_1bb.b") +(final_presence_set "2_0mb_bs" "2_0mb.b") +(final_presence_set "2_1mb_bs" "2_1mb.b") + +(exclusion_set "2_0m_bs" + "2_0mi.i, 2_0mm.i, 2_0mm.f, 2_0mf.i, 2_0mb.b,\ + 2_0mi.b, 2_0mf.b, 2_0mm.b, 2_0mlx., 2_m0_stop") +(exclusion_set "2_1m_bs" + "2_1mi.i, 2_1mm.i, 2_1mm.f, 2_1mf.i, 2_1mb.b,\ + 2_1mi.b, 2_1mf.b, 2_1mm.b, 2_1mlx., 2_m1_stop") +(exclusion_set "2_0mi_bs" "2_0mii., 2_0mib., 2_mi0_stop") +(exclusion_set "2_1mi_bs" "2_1mii., 2_1mib., 2_mi1_stop") +(exclusion_set "2_0mm_bs" "2_0mmi., 2_0mmf., 2_0mmb.") +(exclusion_set "2_1mm_bs" "2_1mmi., 2_1mmf., 2_1mmb.") +(exclusion_set "2_0mf_bs" "2_0mfi., 2_0mfb.") +(exclusion_set "2_1mf_bs" "2_1mfi., 2_1mfb.") +(exclusion_set "2_0b_bs" "2_0bb.b") +(exclusion_set "2_1b_bs" "2_1bb.b") +(exclusion_set "2_0bb_bs" "2_0bbb.") +(exclusion_set "2_1bb_bs" "2_1bbb.") +(exclusion_set "2_0mb_bs" "2_0mbb.") +(exclusion_set "2_1mb_bs" "2_1mbb.") + +(exclusion_set + "2_0m_bs, 2_0mi_bs, 2_0mm_bs, 2_0mf_bs, 2_0b_bs, 2_0bb_bs, 2_0mb_bs, + 2_1m_bs, 2_1mi_bs, 2_1mm_bs, 2_1mf_bs, 2_1b_bs, 2_1bb_bs, 2_1mb_bs" + "2_stop") + +(final_presence_set + "2_0mi.i, 2_0mm.i, 2_0mf.i, 2_0mm.f, 2_0mb.b,\ + 2_0mi.b, 2_0mm.b, 2_0mf.b, 2_0mlx." + "2_m_cont") +(final_presence_set "2_0mii., 2_0mib." "2_mi_cont") +(final_presence_set "2_0mmi., 2_0mmf., 2_0mmb." "2_mm_cont") +(final_presence_set "2_0mfi., 2_0mfb." "2_mf_cont") +(final_presence_set "2_0bb.b" "2_b_cont") +(final_presence_set "2_0bbb." "2_bb_cont") +(final_presence_set "2_0mbb." "2_mb_cont") + +(exclusion_set + "2_0m.ii, 2_0m.mi, 2_0m.fi, 2_0m.mf, 2_0b.bb, 2_0m.bb,\ + 2_0m.ib, 2_0m.mb, 2_0m.fb, 2_0m.lx" + "2_m_cont, 2_mi_cont, 2_mm_cont, 2_mf_cont,\ + 2_mb_cont, 2_b_cont, 2_bb_cont") + +(exclusion_set "2_empty" + "2_m_cont,2_mi_cont,2_mm_cont,2_mf_cont,\ + 2_mb_cont,2_b_cont,2_bb_cont") + +;; For m;mi bundle +(final_presence_set "2_m0_stop" "2_0m.mi") +(final_presence_set "2_0mm.i" "2_0mmi_cont") +(exclusion_set "2_0mmi_cont" + "2_0m.ii, 2_0m.mi, 2_0m.fi, 2_0m.mf, 2_0b.bb, 2_0m.bb,\ + 2_0m.ib, 2_0m.mb, 2_0m.fb, 2_0m.lx") +(exclusion_set "2_m0_stop" "2_0mm.i") +(final_presence_set "2_m1_stop" "2_1m.mi") +(exclusion_set "2_m1_stop" "2_1mm.i") +(final_presence_set "2_m_stop" "2_m0_stop, 2_m1_stop") + +;; For mi;i bundle +(final_presence_set "2_mi0_stop" "2_0mi.i") +(final_presence_set "2_0mii." "2_0mii_cont") +(exclusion_set "2_0mii_cont" + "2_0m.ii, 2_0m.mi, 2_0m.fi, 2_0m.mf, 2_0b.bb, 2_0m.bb,\ + 2_0m.ib, 2_0m.mb, 2_0m.fb, 2_0m.lx") +(exclusion_set "2_mi0_stop" "2_0mii.") +(final_presence_set "2_mi1_stop" "2_1mi.i") +(exclusion_set "2_mi1_stop" "2_1mii.") +(final_presence_set "2_mi_stop" "2_mi0_stop, 2_mi1_stop") + +(final_absence_set + "2_0m.ii,2_0mi.i,2_0mii.,2_0m.mi,2_0mm.i,2_0mmi.,2_0m.fi,2_0mf.i,2_0mfi.,\ + 2_0m.mf,2_0mm.f,2_0mmf.,2_0b.bb,2_0bb.b,2_0bbb.,2_0m.bb,2_0mb.b,2_0mbb.,\ + 2_0m.ib,2_0mi.b,2_0mib.,2_0m.mb,2_0mm.b,2_0mmb.,2_0m.fb,2_0mf.b,2_0mfb.,\ + 2_0m.lx,2_0mlx., \ + 2_1m.ii,2_1mi.i,2_1mii.,2_1m.mi,2_1mm.i,2_1mmi.,2_1m.fi,2_1mf.i,2_1mfi.,\ + 2_1m.mf,2_1mm.f,2_1mmf.,2_1b.bb,2_1bb.b,2_1bbb.,2_1m.bb,2_1mb.b,2_1mbb.,\ + 2_1m.ib,2_1mi.b,2_1mib.,2_1m.mb,2_1mm.b,2_1mmb.,2_1m.fb,2_1mf.b,2_1mfb.,\ + 2_1m.lx,2_1mlx." + "2_m0_stop,2_m1_stop,2_mi0_stop,2_mi1_stop") + +(define_insn_reservation "2_stop_bit" 0 + (and (and (eq_attr "cpu" "itanium2") + (eq_attr "itanium_class" "stop_bit")) + (eq (symbol_ref "bundling_p") (const_int 0))) + "2_stop|2_m0_stop|2_m1_stop|2_mi0_stop|2_mi1_stop") + +(define_insn_reservation "2_br" 0 + (and (and (eq_attr "cpu" "itanium2") + (eq_attr "itanium_class" "br")) + (eq (symbol_ref "bundling_p") (const_int 0))) "2_B") +(define_insn_reservation "2_scall" 0 + (and (and (eq_attr "cpu" "itanium2") + (eq_attr "itanium_class" "scall")) + (eq (symbol_ref "bundling_p") (const_int 0))) "2_B") +(define_insn_reservation "2_fcmp" 2 + (and (and (eq_attr "cpu" "itanium2") + (eq_attr "itanium_class" "fcmp")) + (eq (symbol_ref "bundling_p") (const_int 0))) "2_F") +(define_insn_reservation "2_fcvtfx" 4 + (and (and (eq_attr "cpu" "itanium2") + (eq_attr "itanium_class" "fcvtfx")) + (eq (symbol_ref "bundling_p") (const_int 0))) "2_F") +(define_insn_reservation "2_fld" 6 + (and (and (eq_attr "cpu" "itanium2") + (eq_attr "itanium_class" "fld")) + (eq (symbol_ref "bundling_p") (const_int 0))) "2_M") +(define_insn_reservation "2_fmac" 4 + (and (and (eq_attr "cpu" "itanium2") + (eq_attr "itanium_class" "fmac")) + (eq (symbol_ref "bundling_p") (const_int 0))) "2_F") +(define_insn_reservation "2_fmisc" 4 + (and (and (eq_attr "cpu" "itanium2") + (eq_attr "itanium_class" "fmisc")) + (eq (symbol_ref "bundling_p") (const_int 0))) "2_F") + +;; There is only one insn `mov = ar.bsp' for frar_i: +;; Latency time ??? +(define_insn_reservation "2_frar_i" 13 + (and (and (eq_attr "cpu" "itanium2") + (eq_attr "itanium_class" "frar_i")) + (eq (symbol_ref "bundling_p") (const_int 0))) + "2_I+2_only_ui0") +;; There is only two insns `mov = ar.unat' or `mov = ar.ccv' for frar_m: +;; Latency time ??? +(define_insn_reservation "2_frar_m" 6 + (and (and (eq_attr "cpu" "itanium2") + (eq_attr "itanium_class" "frar_m")) + (eq (symbol_ref "bundling_p") (const_int 0))) + "2_M_only_um2") +(define_insn_reservation "2_frbr" 2 + (and (and (eq_attr "cpu" "itanium2") + (eq_attr "itanium_class" "frbr")) + (eq (symbol_ref "bundling_p") (const_int 0))) + "2_I+2_only_ui0") +(define_insn_reservation "2_frfr" 5 + (and (and (eq_attr "cpu" "itanium2") + (eq_attr "itanium_class" "frfr")) + (eq (symbol_ref "bundling_p") (const_int 0))) + "2_M_only_um2") +(define_insn_reservation "2_frpr" 2 + (and (and (eq_attr "cpu" "itanium2") + (eq_attr "itanium_class" "frpr")) + (eq (symbol_ref "bundling_p") (const_int 0))) + "2_I+2_only_ui0") + +(define_insn_reservation "2_ialu" 1 + (and (and (eq_attr "cpu" "itanium2") + (eq_attr "itanium_class" "ialu")) + (eq (symbol_ref "bundling_p") (const_int 0))) + "2_A") +(define_insn_reservation "2_icmp" 1 + (and (and (eq_attr "cpu" "itanium2") + (eq_attr "itanium_class" "icmp")) + (eq (symbol_ref "bundling_p") (const_int 0))) "2_A") +(define_insn_reservation "2_ilog" 1 + (and (and (eq_attr "cpu" "itanium2") + (eq_attr "itanium_class" "ilog")) + (eq (symbol_ref "bundling_p") (const_int 0))) "2_A") +;; Latency time ??? +(define_insn_reservation "2_ishf" 1 + (and (and (eq_attr "cpu" "itanium2") + (eq_attr "itanium_class" "ishf")) + (eq (symbol_ref "bundling_p") (const_int 0))) + "2_I+2_only_ui0") +(define_insn_reservation "2_ld" 1 + (and (and (eq_attr "cpu" "itanium2") + (eq_attr "itanium_class" "ld")) + (eq (symbol_ref "bundling_p") (const_int 0))) + "2_M_only_um01") +(define_insn_reservation "2_long_i" 1 + (and (and (eq_attr "cpu" "itanium2") + (eq_attr "itanium_class" "long_i")) + (eq (symbol_ref "bundling_p") (const_int 0))) "2_L") + +(define_insn_reservation "2_mmmul" 2 + (and (and (eq_attr "cpu" "itanium2") + (eq_attr "itanium_class" "mmmul")) + (eq (symbol_ref "bundling_p") (const_int 0))) + "2_I+2_only_ui0") +;; Latency time ??? +(define_insn_reservation "2_mmshf" 2 + (and (and (eq_attr "cpu" "itanium2") + (eq_attr "itanium_class" "mmshf")) + (eq (symbol_ref "bundling_p") (const_int 0))) "2_I") +;; Latency time ??? +(define_insn_reservation "2_mmshfi" 1 + (and (and (eq_attr "cpu" "itanium2") + (eq_attr "itanium_class" "mmshfi")) + (eq (symbol_ref "bundling_p") (const_int 0))) "2_I") + +;; Now we have only one insn (flushrs) of such class. We assume that flushrs +;; is the 1st syllable of the bundle after stop bit. +(define_insn_reservation "2_rse_m" 0 + (and (and (eq_attr "cpu" "itanium2") + (eq_attr "itanium_class" "rse_m")) + (eq (symbol_ref "bundling_p") (const_int 0))) + "(2_0m.ii|2_0m.mi|2_0m.fi|2_0m.mf|2_0m.bb\ + |2_0m.ib|2_0m.mb|2_0m.fb|2_0m.lx)+2_um0") +(define_insn_reservation "2_sem" 0 + (and (and (eq_attr "cpu" "itanium2") + (eq_attr "itanium_class" "sem")) + (eq (symbol_ref "bundling_p") (const_int 0))) + "2_M_only_um23") + +(define_insn_reservation "2_stf" 1 + (and (and (eq_attr "cpu" "itanium2") + (eq_attr "itanium_class" "stf")) + (eq (symbol_ref "bundling_p") (const_int 0))) + "2_M_only_um23") +(define_insn_reservation "2_st" 1 + (and (and (eq_attr "cpu" "itanium2") + (eq_attr "itanium_class" "st")) + (eq (symbol_ref "bundling_p") (const_int 0))) + "2_M_only_um23") +(define_insn_reservation "2_syst_m0" 0 + (and (and (eq_attr "cpu" "itanium2") + (eq_attr "itanium_class" "syst_m0")) + (eq (symbol_ref "bundling_p") (const_int 0))) + "2_M_only_um2") +(define_insn_reservation "2_syst_m" 0 + (and (and (eq_attr "cpu" "itanium2") + (eq_attr "itanium_class" "syst_m")) + (eq (symbol_ref "bundling_p") (const_int 0))) + "2_M_only_um0") +;; Reservation??? +(define_insn_reservation "2_tbit" 1 + (and (and (eq_attr "cpu" "itanium2") + (eq_attr "itanium_class" "tbit")) + (eq (symbol_ref "bundling_p") (const_int 0))) + "2_I+2_only_ui0") + +;; There is only ony insn `mov ar.pfs =' for toar_i: +(define_insn_reservation "2_toar_i" 0 + (and (and (eq_attr "cpu" "itanium2") + (eq_attr "itanium_class" "toar_i")) + (eq (symbol_ref "bundling_p") (const_int 0))) + "2_I+2_only_ui0") +;; There are only ony 2 insns `mov ar.ccv =' and `mov ar.unat =' for toar_m: +;; Latency time ??? +(define_insn_reservation "2_toar_m" 5 + (and (and (eq_attr "cpu" "itanium2") + (eq_attr "itanium_class" "toar_m")) + (eq (symbol_ref "bundling_p") (const_int 0))) + "2_M_only_um2") +;; Latency time ??? +(define_insn_reservation "2_tobr" 1 + (and (and (eq_attr "cpu" "itanium2") + (eq_attr "itanium_class" "tobr")) + (eq (symbol_ref "bundling_p") (const_int 0))) + "2_I+2_only_ui0") +(define_insn_reservation "2_tofr" 5 + (and (and (eq_attr "cpu" "itanium2") + (eq_attr "itanium_class" "tofr")) + (eq (symbol_ref "bundling_p") (const_int 0))) + "2_M_only_um23") +;; Latency time ??? +(define_insn_reservation "2_topr" 1 + (and (and (eq_attr "cpu" "itanium2") + (eq_attr "itanium_class" "topr")) + (eq (symbol_ref "bundling_p") (const_int 0))) + "2_I+2_only_ui0") + +(define_insn_reservation "2_xmpy" 4 + (and (and (eq_attr "cpu" "itanium2") + (eq_attr "itanium_class" "xmpy")) + (eq (symbol_ref "bundling_p") (const_int 0))) "2_F") +;; Latency time ??? +(define_insn_reservation "2_xtd" 1 + (and (and (eq_attr "cpu" "itanium2") + (eq_attr "itanium_class" "xtd")) + (eq (symbol_ref "bundling_p") (const_int 0))) "2_I") + +(define_insn_reservation "2_chk_s" 0 + (and (and (eq_attr "cpu" "itanium2") + (eq_attr "itanium_class" "chk_s")) + (eq (symbol_ref "bundling_p") (const_int 0))) + "2_I|2_M_only_um23") +(define_insn_reservation "2_lfetch" 0 + (and (and (eq_attr "cpu" "itanium2") + (eq_attr "itanium_class" "lfetch")) + (eq (symbol_ref "bundling_p") (const_int 0))) + "2_M_only_um01") + +(define_insn_reservation "2_nop_m" 0 + (and (and (eq_attr "cpu" "itanium2") + (eq_attr "itanium_class" "nop_m")) + (eq (symbol_ref "bundling_p") (const_int 0))) "2_M0") +(define_insn_reservation "2_nop_b" 0 + (and (and (eq_attr "cpu" "itanium2") + (eq_attr "itanium_class" "nop_b")) + (eq (symbol_ref "bundling_p") (const_int 0))) "2_NB") +(define_insn_reservation "2_nop_i" 0 + (and (and (eq_attr "cpu" "itanium2") + (eq_attr "itanium_class" "nop_i")) + (eq (symbol_ref "bundling_p") (const_int 0))) "2_I0") +(define_insn_reservation "2_nop_f" 0 + (and (and (eq_attr "cpu" "itanium2") + (eq_attr "itanium_class" "nop_f")) + (eq (symbol_ref "bundling_p") (const_int 0))) "2_F0") +(define_insn_reservation "2_nop_x" 0 + (and (and (eq_attr "cpu" "itanium2") + (eq_attr "itanium_class" "nop_x")) + (eq (symbol_ref "bundling_p") (const_int 0))) "2_L0") + +(define_insn_reservation "2_unknown" 1 + (and (and (eq_attr "cpu" "itanium2") + (eq_attr "itanium_class" "unknown")) + (eq (symbol_ref "bundling_p") (const_int 0))) "2_empty") + +(define_insn_reservation "2_nop" 0 + (and (and (eq_attr "cpu" "itanium2") + (eq_attr "itanium_class" "nop")) + (eq (symbol_ref "bundling_p") (const_int 0))) + "2_M0|2_NB|2_I0|2_F0") + +(define_insn_reservation "2_ignore" 0 + (and (and (eq_attr "cpu" "itanium2") + (eq_attr "itanium_class" "ignore")) + (eq (symbol_ref "bundling_p") (const_int 0))) "nothing") + +(define_cpu_unit "2_m_cont_only, 2_b_cont_only" "two") +(define_cpu_unit "2_mi_cont_only, 2_mm_cont_only, 2_mf_cont_only" "two") +(define_cpu_unit "2_mb_cont_only, 2_bb_cont_only" "two") + +(final_presence_set "2_m_cont_only" "2_m_cont") +(exclusion_set "2_m_cont_only" + "2_0mi.i, 2_0mm.i, 2_0mf.i, 2_0mm.f, 2_0mb.b,\ + 2_0mi.b, 2_0mm.b, 2_0mf.b, 2_0mlx.") + +(final_presence_set "2_b_cont_only" "2_b_cont") +(exclusion_set "2_b_cont_only" "2_0bb.b") + +(final_presence_set "2_mi_cont_only" "2_mi_cont") +(exclusion_set "2_mi_cont_only" "2_0mii., 2_0mib.") + +(final_presence_set "2_mm_cont_only" "2_mm_cont") +(exclusion_set "2_mm_cont_only" "2_0mmi., 2_0mmf., 2_0mmb.") + +(final_presence_set "2_mf_cont_only" "2_mf_cont") +(exclusion_set "2_mf_cont_only" "2_0mfi., 2_0mfb.") + +(final_presence_set "2_mb_cont_only" "2_mb_cont") +(exclusion_set "2_mb_cont_only" "2_0mbb.") + +(final_presence_set "2_bb_cont_only" "2_bb_cont") +(exclusion_set "2_bb_cont_only" "2_0bbb.") + +(define_insn_reservation "2_pre_cycle" 0 + (and (and (eq_attr "cpu" "itanium2") + (eq_attr "itanium_class" "pre_cycle")) + (eq (symbol_ref "bundling_p") (const_int 0))) + "nothing") + +;;(define_insn_reservation "2_pre_cycle" 0 +;; (and (and (eq_attr "cpu" "itanium2") +;; (eq_attr "itanium_class" "pre_cycle")) +;; (eq (symbol_ref "bundling_p") (const_int 0))) +;; "(2_0m_bs, 2_m_cont) \ +;; | (2_0mi_bs, (2_mi_cont|nothing)) \ +;; | (2_0mm_bs, 2_mm_cont) \ +;; | (2_0mf_bs, (2_mf_cont|nothing)) \ +;; | (2_0b_bs, (2_b_cont|nothing)) \ +;; | (2_0bb_bs, (2_bb_cont|nothing)) \ +;; | (2_0mb_bs, (2_mb_cont|nothing)) \ +;; | (2_1m_bs, 2_m_cont) \ +;; | (2_1mi_bs, (2_mi_cont|nothing)) \ +;; | (2_1mm_bs, 2_mm_cont) \ +;; | (2_1mf_bs, (2_mf_cont|nothing)) \ +;; | (2_1b_bs, (2_b_cont|nothing)) \ +;; | (2_1bb_bs, (2_bb_cont|nothing)) \ +;; | (2_1mb_bs, (2_mb_cont|nothing)) \ +;; | (2_m_cont_only, (2_m_cont|nothing)) \ +;; | (2_b_cont_only, (2_b_cont|nothing)) \ +;; | (2_mi_cont_only, (2_mi_cont|nothing)) \ +;; | (2_mm_cont_only, (2_mm_cont|nothing)) \ +;; | (2_mf_cont_only, (2_mf_cont|nothing)) \ +;; | (2_mb_cont_only, (2_mb_cont|nothing)) \ +;; | (2_bb_cont_only, (2_bb_cont|nothing)) \ +;; | (2_m_stop, (2_0mmi_cont|nothing)) \ +;; | (2_mi_stop, (2_0mii_cont|nothing))") + +;; Bypasses: + +(define_bypass 1 "2_fcmp" "2_br,2_scall") +(define_bypass 0 "2_icmp" "2_br,2_scall") +(define_bypass 0 "2_tbit" "2_br,2_scall") +(define_bypass 2 "2_ld" "2_ld" "ia64_ld_address_bypass_p") +(define_bypass 2 "2_ld" "2_st" "ia64_st_address_bypass_p") +(define_bypass 2 "2_ld" "2_mmmul,2_mmshf") +(define_bypass 3 "2_ilog" "2_mmmul,2_mmshf") +(define_bypass 3 "2_ialu" "2_mmmul,2_mmshf") +(define_bypass 3 "2_mmmul,2_mmshf" "2_ialu,2_ilog,2_ishf,2_st,2_ld") +(define_bypass 6 "2_tofr" "2_frfr,2_stf") +(define_bypass 7 "2_fmac" "2_frfr,2_stf") + +;; We don't use here fcmp because scall may be predicated. +(define_bypass 0 "2_fcvtfx,2_fld,2_fmac,2_fmisc,2_frar_i,2_frar_m,\ + 2_frbr,2_frfr,2_frpr,2_ialu,2_ilog,2_ishf,2_ld,2_long_i,\ + 2_mmmul,2_mmshf,2_mmshfi,2_toar_m,2_tofr,2_xmpy,2_xtd" + "2_scall") + +(define_bypass 0 "2_unknown,2_ignore,2_stop_bit,2_br,2_fcmp,2_fcvtfx,2_fld,\ + 2_fmac,2_fmisc,2_frar_i,2_frar_m,2_frbr,2_frfr,2_frpr,\ + 2_ialu,2_icmp,2_ilog,2_ishf,2_ld,2_chk_s,\ + 2_long_i,2_mmmul,2_mmshf,2_mmshfi,2_nop,2_nop_b,2_nop_f,\ + 2_nop_i,2_nop_m,2_nop_x,2_rse_m,2_scall,2_sem,2_stf,2_st,\ + 2_syst_m0,2_syst_m,2_tbit,2_toar_i,2_toar_m,2_tobr,2_tofr,\ + 2_topr,2_xmpy,2_xtd,2_lfetch" "2_ignore") + + + +;; Bundling + +(define_automaton "twob") + +;; Pseudo units for quicker searching for position in two packet window. */ +(define_query_cpu_unit "2_1,2_2,2_3,2_4,2_5,2_6" "twob") + +;; All possible combinations of bundles/syllables +(define_cpu_unit + "2b_0m.ii, 2b_0m.mi, 2b_0m.fi, 2b_0m.mf, 2b_0b.bb, 2b_0m.bb,\ + 2b_0m.ib, 2b_0m.mb, 2b_0m.fb, 2b_0m.lx" "twob") +(define_cpu_unit + "2b_0mi.i, 2b_0mm.i, 2b_0mf.i, 2b_0mm.f, 2b_0bb.b, 2b_0mb.b,\ + 2b_0mi.b, 2b_0mm.b, 2b_0mf.b" "twob") +(define_query_cpu_unit + "2b_0mii., 2b_0mmi., 2b_0mfi., 2b_0mmf., 2b_0bbb., 2b_0mbb.,\ + 2b_0mib., 2b_0mmb., 2b_0mfb., 2b_0mlx." "twob") + +(define_cpu_unit + "2b_1m.ii, 2b_1m.mi, 2b_1m.fi, 2b_1m.mf, 2b_1b.bb, 2b_1m.bb,\ + 2b_1m.ib, 2b_1m.mb, 2b_1m.fb, 2b_1m.lx" "twob") +(define_cpu_unit + "2b_1mi.i, 2b_1mm.i, 2b_1mf.i, 2b_1mm.f, 2b_1bb.b, 2b_1mb.b,\ + 2b_1mi.b, 2b_1mm.b, 2b_1mf.b" "twob") +(define_query_cpu_unit + "2b_1mii., 2b_1mmi., 2b_1mfi., 2b_1mmf., 2b_1bbb., 2b_1mbb.,\ + 2b_1mib., 2b_1mmb., 2b_1mfb., 2b_1mlx." "twob") + +;; Slot 1 +(exclusion_set "2b_0m.ii" + "2b_0m.mi, 2b_0m.fi, 2b_0m.mf, 2b_0b.bb, 2b_0m.bb,\ + 2b_0m.ib, 2b_0m.mb, 2b_0m.fb, 2b_0m.lx") +(exclusion_set "2b_0m.mi" + "2b_0m.fi, 2b_0m.mf, 2b_0b.bb, 2b_0m.bb, 2b_0m.ib,\ + 2b_0m.mb, 2b_0m.fb, 2b_0m.lx") +(exclusion_set "2b_0m.fi" + "2b_0m.mf, 2b_0b.bb, 2b_0m.bb, 2b_0m.ib, 2b_0m.mb, 2b_0m.fb, 2b_0m.lx") +(exclusion_set "2b_0m.mf" + "2b_0b.bb, 2b_0m.bb, 2b_0m.ib, 2b_0m.mb, 2b_0m.fb, 2b_0m.lx") +(exclusion_set "2b_0b.bb" "2b_0m.bb, 2b_0m.ib, 2b_0m.mb, 2b_0m.fb, 2b_0m.lx") +(exclusion_set "2b_0m.bb" "2b_0m.ib, 2b_0m.mb, 2b_0m.fb, 2b_0m.lx") +(exclusion_set "2b_0m.ib" "2b_0m.mb, 2b_0m.fb, 2b_0m.lx") +(exclusion_set "2b_0m.mb" "2b_0m.fb, 2b_0m.lx") +(exclusion_set "2b_0m.fb" "2b_0m.lx") + +;; Slot 2 +(exclusion_set "2b_0mi.i" + "2b_0mm.i, 2b_0mf.i, 2b_0mm.f, 2b_0bb.b, 2b_0mb.b,\ + 2b_0mi.b, 2b_0mm.b, 2b_0mf.b, 2b_0mlx.") +(exclusion_set "2b_0mm.i" + "2b_0mf.i, 2b_0mm.f, 2b_0bb.b, 2b_0mb.b,\ + 2b_0mi.b, 2b_0mm.b, 2b_0mf.b, 2b_0mlx.") +(exclusion_set "2b_0mf.i" + "2b_0mm.f, 2b_0bb.b, 2b_0mb.b, 2b_0mi.b, 2b_0mm.b, 2b_0mf.b, 2b_0mlx.") +(exclusion_set "2b_0mm.f" + "2b_0bb.b, 2b_0mb.b, 2b_0mi.b, 2b_0mm.b, 2b_0mf.b, 2b_0mlx.") +(exclusion_set "2b_0bb.b" "2b_0mb.b, 2b_0mi.b, 2b_0mm.b, 2b_0mf.b, 2b_0mlx.") +(exclusion_set "2b_0mb.b" "2b_0mi.b, 2b_0mm.b, 2b_0mf.b, 2b_0mlx.") +(exclusion_set "2b_0mi.b" "2b_0mm.b, 2b_0mf.b, 2b_0mlx.") +(exclusion_set "2b_0mm.b" "2b_0mf.b, 2b_0mlx.") +(exclusion_set "2b_0mf.b" "2b_0mlx.") + +;; Slot 3 +(exclusion_set "2b_0mii." + "2b_0mmi., 2b_0mfi., 2b_0mmf., 2b_0bbb., 2b_0mbb.,\ + 2b_0mib., 2b_0mmb., 2b_0mfb., 2b_0mlx.") +(exclusion_set "2b_0mmi." + "2b_0mfi., 2b_0mmf., 2b_0bbb., 2b_0mbb.,\ + 2b_0mib., 2b_0mmb., 2b_0mfb., 2b_0mlx.") +(exclusion_set "2b_0mfi." + "2b_0mmf., 2b_0bbb., 2b_0mbb., 2b_0mib., 2b_0mmb., 2b_0mfb., 2b_0mlx.") +(exclusion_set "2b_0mmf." + "2b_0bbb., 2b_0mbb., 2b_0mib., 2b_0mmb., 2b_0mfb., 2b_0mlx.") +(exclusion_set "2b_0bbb." "2b_0mbb., 2b_0mib., 2b_0mmb., 2b_0mfb., 2b_0mlx.") +(exclusion_set "2b_0mbb." "2b_0mib., 2b_0mmb., 2b_0mfb., 2b_0mlx.") +(exclusion_set "2b_0mib." "2b_0mmb., 2b_0mfb., 2b_0mlx.") +(exclusion_set "2b_0mmb." "2b_0mfb., 2b_0mlx.") +(exclusion_set "2b_0mfb." "2b_0mlx.") + +;; Slot 4 +(exclusion_set "2b_1m.ii" + "2b_1m.mi, 2b_1m.fi, 2b_1m.mf, 2b_1b.bb, 2b_1m.bb,\ + 2b_1m.ib, 2b_1m.mb, 2b_1m.fb, 2b_1m.lx") +(exclusion_set "2b_1m.mi" + "2b_1m.fi, 2b_1m.mf, 2b_1b.bb, 2b_1m.bb, 2b_1m.ib,\ + 2b_1m.mb, 2b_1m.fb, 2b_1m.lx") +(exclusion_set "2b_1m.fi" + "2b_1m.mf, 2b_1b.bb, 2b_1m.bb, 2b_1m.ib, 2b_1m.mb, 2b_1m.fb, 2b_1m.lx") +(exclusion_set "2b_1m.mf" + "2b_1b.bb, 2b_1m.bb, 2b_1m.ib, 2b_1m.mb, 2b_1m.fb, 2b_1m.lx") +(exclusion_set "2b_1b.bb" "2b_1m.bb, 2b_1m.ib, 2b_1m.mb, 2b_1m.fb, 2b_1m.lx") +(exclusion_set "2b_1m.bb" "2b_1m.ib, 2b_1m.mb, 2b_1m.fb, 2b_1m.lx") +(exclusion_set "2b_1m.ib" "2b_1m.mb, 2b_1m.fb, 2b_1m.lx") +(exclusion_set "2b_1m.mb" "2b_1m.fb, 2b_1m.lx") +(exclusion_set "2b_1m.fb" "2b_1m.lx") + +;; Slot 5 +(exclusion_set "2b_1mi.i" + "2b_1mm.i, 2b_1mf.i, 2b_1mm.f, 2b_1bb.b, 2b_1mb.b,\ + 2b_1mi.b, 2b_1mm.b, 2b_1mf.b, 2b_1mlx.") +(exclusion_set "2b_1mm.i" + "2b_1mf.i, 2b_1mm.f, 2b_1bb.b, 2b_1mb.b,\ + 2b_1mi.b, 2b_1mm.b, 2b_1mf.b, 2b_1mlx.") +(exclusion_set "2b_1mf.i" + "2b_1mm.f, 2b_1bb.b, 2b_1mb.b, 2b_1mi.b, 2b_1mm.b, 2b_1mf.b, 2b_1mlx.") +(exclusion_set "2b_1mm.f" + "2b_1bb.b, 2b_1mb.b, 2b_1mi.b, 2b_1mm.b, 2b_1mf.b, 2b_1mlx.") +(exclusion_set "2b_1bb.b" "2b_1mb.b, 2b_1mi.b, 2b_1mm.b, 2b_1mf.b, 2b_1mlx.") +(exclusion_set "2b_1mb.b" "2b_1mi.b, 2b_1mm.b, 2b_1mf.b, 2b_1mlx.") +(exclusion_set "2b_1mi.b" "2b_1mm.b, 2b_1mf.b, 2b_1mlx.") +(exclusion_set "2b_1mm.b" "2b_1mf.b, 2b_1mlx.") +(exclusion_set "2b_1mf.b" "2b_1mlx.") + +;; Slot 6 +(exclusion_set "2b_1mii." + "2b_1mmi., 2b_1mfi., 2b_1mmf., 2b_1bbb., 2b_1mbb.,\ + 2b_1mib., 2b_1mmb., 2b_1mfb., 2b_1mlx.") +(exclusion_set "2b_1mmi." + "2b_1mfi., 2b_1mmf., 2b_1bbb., 2b_1mbb.,\ + 2b_1mib., 2b_1mmb., 2b_1mfb., 2b_1mlx.") +(exclusion_set "2b_1mfi." + "2b_1mmf., 2b_1bbb., 2b_1mbb., 2b_1mib., 2b_1mmb., 2b_1mfb., 2b_1mlx.") +(exclusion_set "2b_1mmf." + "2b_1bbb., 2b_1mbb., 2b_1mib., 2b_1mmb., 2b_1mfb., 2b_1mlx.") +(exclusion_set "2b_1bbb." "2b_1mbb., 2b_1mib., 2b_1mmb., 2b_1mfb., 2b_1mlx.") +(exclusion_set "2b_1mbb." "2b_1mib., 2b_1mmb., 2b_1mfb., 2b_1mlx.") +(exclusion_set "2b_1mib." "2b_1mmb., 2b_1mfb., 2b_1mlx.") +(exclusion_set "2b_1mmb." "2b_1mfb., 2b_1mlx.") +(exclusion_set "2b_1mfb." "2b_1mlx.") + +(final_presence_set "2b_0mi.i" "2b_0m.ii") +(final_presence_set "2b_0mii." "2b_0mi.i") +(final_presence_set "2b_1mi.i" "2b_1m.ii") +(final_presence_set "2b_1mii." "2b_1mi.i") + +(final_presence_set "2b_0mm.i" "2b_0m.mi") +(final_presence_set "2b_0mmi." "2b_0mm.i") +(final_presence_set "2b_1mm.i" "2b_1m.mi") +(final_presence_set "2b_1mmi." "2b_1mm.i") + +(final_presence_set "2b_0mf.i" "2b_0m.fi") +(final_presence_set "2b_0mfi." "2b_0mf.i") +(final_presence_set "2b_1mf.i" "2b_1m.fi") +(final_presence_set "2b_1mfi." "2b_1mf.i") + +(final_presence_set "2b_0mm.f" "2b_0m.mf") +(final_presence_set "2b_0mmf." "2b_0mm.f") +(final_presence_set "2b_1mm.f" "2b_1m.mf") +(final_presence_set "2b_1mmf." "2b_1mm.f") + +(final_presence_set "2b_0bb.b" "2b_0b.bb") +(final_presence_set "2b_0bbb." "2b_0bb.b") +(final_presence_set "2b_1bb.b" "2b_1b.bb") +(final_presence_set "2b_1bbb." "2b_1bb.b") + +(final_presence_set "2b_0mb.b" "2b_0m.bb") +(final_presence_set "2b_0mbb." "2b_0mb.b") +(final_presence_set "2b_1mb.b" "2b_1m.bb") +(final_presence_set "2b_1mbb." "2b_1mb.b") + +(final_presence_set "2b_0mi.b" "2b_0m.ib") +(final_presence_set "2b_0mib." "2b_0mi.b") +(final_presence_set "2b_1mi.b" "2b_1m.ib") +(final_presence_set "2b_1mib." "2b_1mi.b") + +(final_presence_set "2b_0mm.b" "2b_0m.mb") +(final_presence_set "2b_0mmb." "2b_0mm.b") +(final_presence_set "2b_1mm.b" "2b_1m.mb") +(final_presence_set "2b_1mmb." "2b_1mm.b") + +(final_presence_set "2b_0mf.b" "2b_0m.fb") +(final_presence_set "2b_0mfb." "2b_0mf.b") +(final_presence_set "2b_1mf.b" "2b_1m.fb") +(final_presence_set "2b_1mfb." "2b_1mf.b") + +(final_presence_set "2b_0mlx." "2b_0m.lx") +(final_presence_set "2b_1mlx." "2b_1m.lx") + +;; See the corresponding comment in non-bundling section above. +(final_presence_set + "2b_1m.lx" + "2b_0mmi.,2b_0mfi.,2b_0mmf.,2b_0mib.,2b_0mmb.,2b_0mfb.,2b_0mlx.") +(final_presence_set "2b_1b.bb" "2b_0mii.,2b_0mmi.,2b_0mfi.,2b_0mmf.,2b_0mlx.") +(final_presence_set + "2b_1m.ii,2b_1m.mi,2b_1m.fi,2b_1m.mf,2b_1m.bb,2b_1m.ib,2b_1m.mb,2b_1m.fb" + "2b_0mii.,2b_0mmi.,2b_0mfi.,2b_0mmf.,2b_0mib.,2b_0mmb.,2b_0mfb.,2b_0mlx.") + +;; Ports/units (nb means nop.b insn issued into given port): +(define_cpu_unit + "2b_um0, 2b_um1, 2b_um2, 2b_um3, 2b_ui0, 2b_ui1, 2b_uf0, 2b_uf1,\ + 2b_ub0, 2b_ub1, 2b_ub2, 2b_unb0, 2b_unb1, 2b_unb2" "twob") + +(exclusion_set "2b_ub0" "2b_unb0") +(exclusion_set "2b_ub1" "2b_unb1") +(exclusion_set "2b_ub2" "2b_unb2") + +;; The following rules are used to decrease number of alternatives. +;; They are consequences of Itanium2 microarchitecture. They also +;; describe the following rules mentioned in Itanium2 +;; microarchitecture: rules mentioned in Itanium2 microarchitecture: +;; o "BBB/MBB: Always splits issue after either of these bundles". +;; o "MIB BBB: Split issue after the first bundle in this pair". +(exclusion_set + "2b_0b.bb,2b_0bb.b,2b_0bbb.,2b_0m.bb,2b_0mb.b,2b_0mbb." + "2b_1m.ii,2b_1m.mi,2b_1m.fi,2b_1m.mf,2b_1b.bb,2b_1m.bb,\ + 2b_1m.ib,2b_1m.mb,2b_1m.fb,2b_1m.lx") +(exclusion_set "2b_0m.ib,2b_0mi.b,2b_0mib." "2b_1b.bb") + +;;; "MIB/MFB/MMB: Splits issue after any of these bundles unless the +;;; B-slot contains a nop.b or a brp instruction". +;;; "The B in an MIB/MFB/MMB bundle disperses to B0 if it is a brp or +;;; nop.b, otherwise it disperses to B2". +(final_absence_set + "2b_1m.ii, 2b_1m.mi, 2b_1m.fi, 2b_1m.mf, 2b_1b.bb, 2b_1m.bb,\ + 2b_1m.ib, 2b_1m.mb, 2b_1m.fb, 2b_1m.lx" + "2b_0mib. 2b_ub2, 2b_0mfb. 2b_ub2, 2b_0mmb. 2b_ub2") + +;; This is necessary to start new processor cycle when we meet stop bit. +(define_cpu_unit "2b_stop" "twob") +(final_absence_set + "2b_0m.ii,2b_0mi.i,2b_0mii.,2b_0m.mi,2b_0mm.i,2b_0mmi.,\ + 2b_0m.fi,2b_0mf.i,2b_0mfi.,\ + 2b_0m.mf,2b_0mm.f,2b_0mmf.,2b_0b.bb,2b_0bb.b,2b_0bbb.,\ + 2b_0m.bb,2b_0mb.b,2b_0mbb.,\ + 2b_0m.ib,2b_0mi.b,2b_0mib.,2b_0m.mb,2b_0mm.b,2b_0mmb.,\ + 2b_0m.fb,2b_0mf.b,2b_0mfb.,2b_0m.lx,2b_0mlx., \ + 2b_1m.ii,2b_1mi.i,2b_1mii.,2b_1m.mi,2b_1mm.i,2b_1mmi.,\ + 2b_1m.fi,2b_1mf.i,2b_1mfi.,\ + 2b_1m.mf,2b_1mm.f,2b_1mmf.,2b_1b.bb,2b_1bb.b,2b_1bbb.,\ + 2b_1m.bb,2b_1mb.b,2b_1mbb.,\ + 2b_1m.ib,2b_1mi.b,2b_1mib.,2b_1m.mb,2b_1mm.b,2b_1mmb.,\ + 2b_1m.fb,2b_1mf.b,2b_1mfb.,2b_1m.lx,2b_1mlx." + "2b_stop") + +;; The issue logic can reorder M slot insns between different subtypes +;; but can not reorder insn within the same subtypes. The following +;; constraint is enough to describe this. +(final_presence_set "2b_um1" "2b_um0") +(final_presence_set "2b_um3" "2b_um2") + +;; The insn in the 1st I slot of the two bundle issue group will issue +;; to I0. The second I slot insn will issue to I1. +(final_presence_set "2b_ui1" "2b_ui0") + +;; For exceptions of I insns: +(define_cpu_unit "2b_only_ui0" "twob") +(final_absence_set "2b_only_ui0" "2b_ui1") + +;; Insns + +(define_reservation "2b_M" + "((2b_0m.ii|2b_0m.mi|2b_0m.fi|2b_0m.mf|2b_0m.bb\ + |2b_0m.ib|2b_0m.mb|2b_0m.fb|2b_0m.lx)+2_1\ + |(2b_1m.ii|2b_1m.mi|2b_1m.fi|2b_1m.mf|2b_1m.bb\ + |2b_1m.ib|2b_1m.mb|2b_1m.fb|2b_1m.lx)+2_4\ + |(2b_0mm.i|2b_0mm.f|2b_0mm.b)+2_2\ + |(2b_1mm.i|2b_1mm.f|2b_1mm.b)+2_5)\ + +(2b_um0|2b_um1|2b_um2|2b_um3)") + +(define_reservation "2b_M_only_um0" + "((2b_0m.ii|2b_0m.mi|2b_0m.fi|2b_0m.mf|2b_0m.bb\ + |2b_0m.ib|2b_0m.mb|2b_0m.fb|2b_0m.lx)+2_1\ + |(2b_1m.ii|2b_1m.mi|2b_1m.fi|2b_1m.mf|2b_1m.bb\ + |2b_1m.ib|2b_1m.mb|2b_1m.fb|2b_1m.lx)+2_4\ + |(2b_0mm.i|2b_0mm.f|2b_0mm.b)+2_2\ + |(2b_1mm.i|2b_1mm.f|2b_1mm.b)+2_5)\ + +2b_um0") + +(define_reservation "2b_M_only_um2" + "((2b_0m.ii|2b_0m.mi|2b_0m.fi|2b_0m.mf|2b_0m.bb\ + |2b_0m.ib|2b_0m.mb|2b_0m.fb|2b_0m.lx)+2_1\ + |(2b_1m.ii|2b_1m.mi|2b_1m.fi|2b_1m.mf|2b_1m.bb\ + |2b_1m.ib|2b_1m.mb|2b_1m.fb|2b_1m.lx)+2_4\ + |(2b_0mm.i|2b_0mm.f|2b_0mm.b)+2_2\ + |(2b_1mm.i|2b_1mm.f|2b_1mm.b)+2_5)\ + +2b_um2") + +(define_reservation "2b_M_only_um01" + "((2b_0m.ii|2b_0m.mi|2b_0m.fi|2b_0m.mf|2b_0m.bb\ + |2b_0m.ib|2b_0m.mb|2b_0m.fb|2b_0m.lx)+2_1\ + |(2b_1m.ii|2b_1m.mi|2b_1m.fi|2b_1m.mf|2b_1m.bb\ + |2b_1m.ib|2b_1m.mb|2b_1m.fb|2b_1m.lx)+2_4\ + |(2b_0mm.i|2b_0mm.f|2b_0mm.b)+2_2\ + |(2b_1mm.i|2b_1mm.f|2b_1mm.b)+2_5)\ + +(2b_um0|2b_um1)") + +(define_reservation "2b_M_only_um23" + "((2b_0m.ii|2b_0m.mi|2b_0m.fi|2b_0m.mf|2b_0m.bb\ + |2b_0m.ib|2b_0m.mb|2b_0m.fb|2b_0m.lx)+2_1\ + |(2b_1m.ii|2b_1m.mi|2b_1m.fi|2b_1m.mf|2b_1m.bb\ + |2b_1m.ib|2b_1m.mb|2b_1m.fb|2b_1m.lx)+2_4\ + |(2b_0mm.i|2b_0mm.f|2b_0mm.b)+2_2\ + |(2b_1mm.i|2b_1mm.f|2b_1mm.b)+2_5)\ + +(2b_um2|2b_um3)") + +;; I instruction is dispersed to the lowest numbered I unit +;; not already in use. Remeber about possible spliting. +(define_reservation "2b_I" + "2b_0mi.i+2_2+2b_ui0|2b_0mii.+2_3+(2b_ui0|2b_ui1)|2b_0mmi.+2_3+2b_ui0\ + |2b_0mfi.+2_3+2b_ui0|2b_0mi.b+2_2+2b_ui0\ + |(2b_1mi.i+2_5|2b_1mi.b+2_5)+(2b_ui0|2b_ui1)\ + |(2b_1mii.|2b_1mmi.|2b_1mfi.)+2_6+(2b_ui0|2b_ui1)") + +;; "An F slot in the 1st bundle disperses to F0". +;; "An F slot in the 2st bundle disperses to F1". +(define_reservation "2b_F" + "2b_0mf.i+2_2+2b_uf0|2b_0mmf.+2_3+2b_uf0|2b_0mf.b+2_2+2b_uf0\ + |2b_1mf.i+2_5+2b_uf1|2b_1mmf.+2_6+2b_uf1|2b_1mf.b+2_5+2b_uf1") + +;;; "Each B slot in MBB or BBB bundle disperses to the corresponding B +;;; unit. That is, a B slot in 1st position is despersed to B0. In the +;;; 2nd position it is dispersed to B2". +(define_reservation "2b_NB" + "2b_0b.bb+2_1+2b_unb0|2b_0bb.b+2_2+2b_unb1|2b_0bbb.+2_3+2b_unb2\ + |2b_0mb.b+2_2+2b_unb1|2b_0mbb.+2_3+2b_unb2\ + |2b_0mib.+2_3+2b_unb0|2b_0mmb.+2_3+2b_unb0|2b_0mfb.+2_3+2b_unb0\ + |2b_1b.bb+2_4+2b_unb0|2b_1bb.b+2_5+2b_unb1\ + |2b_1bbb.+2_6+2b_unb2|2b_1mb.b+2_5+2b_unb1|2b_1mbb.+2_6+2b_unb2\ + |2b_1mib.+2_6+2b_unb0|2b_1mmb.+2_6+2b_unb0|2b_1mfb.+2_6+2b_unb0") + +(define_reservation "2b_B" + "2b_0b.bb+2_1+2b_ub0|2b_0bb.b+2_2+2b_ub1|2b_0bbb.+2_3+2b_ub2\ + |2b_0mb.b+2_2+2b_ub1|2b_0mbb.+2_3+2b_ub2|2b_0mib.+2_3+2b_ub2\ + |2b_0mfb.+2_3+2b_ub2|2b_1b.bb+2_4+2b_ub0|2b_1bb.b+2_5+2b_ub1\ + |2b_1bbb.+2_6+2b_ub2|2b_1mb.b+2_5+2b_ub1\ + |2b_1mib.+2_6+2b_ub2|2b_1mmb.+2_6+2b_ub2|2b_1mfb.+2_6+2b_ub2") + +;; For the MLI template, the I slot insn is always assigned to port I0 +;; if it is in the first bundle or it is assigned to port I1 if it is in +;; the second bundle. +(define_reservation "2b_L" + "2b_0mlx.+2_3+2b_ui0+2b_uf0|2b_1mlx.+2_6+2b_ui1+2b_uf1") + +;; Should we describe that A insn in I slot can be issued into M +;; ports? I think it is not necessary because of multipass +;; scheduling. For example, the multipass scheduling could use +;; MMI-MMI instead of MII-MII where the two last I slots contain A +;; insns (even if the case is complicated by use-def conflicts). +;; +;; In any case we could describe it as +;; (define_cpu_unit "2b_ui1_0pres,2b_ui1_1pres,2b_ui1_2pres,2b_ui1_3pres" +;; "twob") +;; (final_presence_set "2b_ui1_0pres,2b_ui1_1pres,2b_ui1_2pres,2b_ui1_3pres" +;; "2b_ui1") +;; (define_reservation "b_A" +;; "b_M|b_I\ +;; |(2b_1mi.i+2_5|2b_1mii.+2_6|2b_1mmi.+2_6|2b_1mfi.+2_6|2b_1mi.b+2_5)\ +;; +(2b_um0|2b_um1|2b_um2|2b_um3)\ +;; +(2b_ui1_0pres|2b_ui1_1pres|2b_ui1_2pres|2b_ui1_3pres)") + +(define_reservation "2b_A" "2b_M|2b_I") + +;; We assume that there is no insn issued on the same cycle as the +;; unknown insn. +(define_cpu_unit "2b_empty" "twob") +(exclusion_set "2b_empty" + "2b_0m.ii,2b_0m.mi,2b_0m.fi,2b_0m.mf,2b_0b.bb,2b_0m.bb,\ + 2b_0m.ib,2b_0m.mb,2b_0m.fb,2b_0m.lx,2b_0mm.i") + +(define_cpu_unit + "2b_0m_bs, 2b_0mi_bs, 2b_0mm_bs, 2b_0mf_bs, 2b_0b_bs, 2b_0bb_bs, 2b_0mb_bs" + "twob") +(define_cpu_unit + "2b_1m_bs, 2b_1mi_bs, 2b_1mm_bs, 2b_1mf_bs, 2b_1b_bs, 2b_1bb_bs, 2b_1mb_bs" + "twob") + +(define_cpu_unit "2b_m_cont, 2b_mi_cont, 2b_mm_cont, 2b_mf_cont, 2b_mb_cont,\ + 2b_b_cont, 2b_bb_cont" "twob") + +;; For stop in the middle of the bundles. +(define_cpu_unit "2b_m_stop, 2b_m0_stop, 2b_m1_stop, 2b_0mmi_cont" "twob") +(define_cpu_unit "2b_mi_stop, 2b_mi0_stop, 2b_mi1_stop, 2b_0mii_cont" "twob") + +(final_presence_set "2b_0m_bs" + "2b_0m.ii, 2b_0m.mi, 2b_0m.mf, 2b_0m.fi, 2b_0m.bb,\ + 2b_0m.ib, 2b_0m.fb, 2b_0m.mb, 2b_0m.lx") +(final_presence_set "2b_1m_bs" + "2b_1m.ii, 2b_1m.mi, 2b_1m.mf, 2b_1m.fi, 2b_1m.bb,\ + 2b_1m.ib, 2b_1m.fb, 2b_1m.mb, 2b_1m.lx") +(final_presence_set "2b_0mi_bs" "2b_0mi.i, 2b_0mi.i") +(final_presence_set "2b_1mi_bs" "2b_1mi.i, 2b_1mi.i") +(final_presence_set "2b_0mm_bs" "2b_0mm.i, 2b_0mm.f, 2b_0mm.b") +(final_presence_set "2b_1mm_bs" "2b_1mm.i, 2b_1mm.f, 2b_1mm.b") +(final_presence_set "2b_0mf_bs" "2b_0mf.i, 2b_0mf.b") +(final_presence_set "2b_1mf_bs" "2b_1mf.i, 2b_1mf.b") +(final_presence_set "2b_0b_bs" "2b_0b.bb") +(final_presence_set "2b_1b_bs" "2b_1b.bb") +(final_presence_set "2b_0bb_bs" "2b_0bb.b") +(final_presence_set "2b_1bb_bs" "2b_1bb.b") +(final_presence_set "2b_0mb_bs" "2b_0mb.b") +(final_presence_set "2b_1mb_bs" "2b_1mb.b") + +(exclusion_set "2b_0m_bs" + "2b_0mi.i, 2b_0mm.i, 2b_0mm.f, 2b_0mf.i, 2b_0mb.b,\ + 2b_0mi.b, 2b_0mf.b, 2b_0mm.b, 2b_0mlx., 2b_m0_stop") +(exclusion_set "2b_1m_bs" + "2b_1mi.i, 2b_1mm.i, 2b_1mm.f, 2b_1mf.i, 2b_1mb.b,\ + 2b_1mi.b, 2b_1mf.b, 2b_1mm.b, 2b_1mlx., 2b_m1_stop") +(exclusion_set "2b_0mi_bs" "2b_0mii., 2b_0mib., 2b_mi0_stop") +(exclusion_set "2b_1mi_bs" "2b_1mii., 2b_1mib., 2b_mi1_stop") +(exclusion_set "2b_0mm_bs" "2b_0mmi., 2b_0mmf., 2b_0mmb.") +(exclusion_set "2b_1mm_bs" "2b_1mmi., 2b_1mmf., 2b_1mmb.") +(exclusion_set "2b_0mf_bs" "2b_0mfi., 2b_0mfb.") +(exclusion_set "2b_1mf_bs" "2b_1mfi., 2b_1mfb.") +(exclusion_set "2b_0b_bs" "2b_0bb.b") +(exclusion_set "2b_1b_bs" "2b_1bb.b") +(exclusion_set "2b_0bb_bs" "2b_0bbb.") +(exclusion_set "2b_1bb_bs" "2b_1bbb.") +(exclusion_set "2b_0mb_bs" "2b_0mbb.") +(exclusion_set "2b_1mb_bs" "2b_1mbb.") + +(exclusion_set + "2b_0m_bs, 2b_0mi_bs, 2b_0mm_bs, 2b_0mf_bs, 2b_0b_bs, 2b_0bb_bs, 2b_0mb_bs, + 2b_1m_bs, 2b_1mi_bs, 2b_1mm_bs, 2b_1mf_bs, 2b_1b_bs, 2b_1bb_bs, 2b_1mb_bs" + "2b_stop") + +(final_presence_set + "2b_0mi.i, 2b_0mm.i, 2b_0mf.i, 2b_0mm.f, 2b_0mb.b,\ + 2b_0mi.b, 2b_0mm.b, 2b_0mf.b, 2b_0mlx." + "2b_m_cont") +(final_presence_set "2b_0mii., 2b_0mib." "2b_mi_cont") +(final_presence_set "2b_0mmi., 2b_0mmf., 2b_0mmb." "2b_mm_cont") +(final_presence_set "2b_0mfi., 2b_0mfb." "2b_mf_cont") +(final_presence_set "2b_0bb.b" "2b_b_cont") +(final_presence_set "2b_0bbb." "2b_bb_cont") +(final_presence_set "2b_0mbb." "2b_mb_cont") + +(exclusion_set + "2b_0m.ii, 2b_0m.mi, 2b_0m.fi, 2b_0m.mf, 2b_0b.bb, 2b_0m.bb,\ + 2b_0m.ib, 2b_0m.mb, 2b_0m.fb, 2b_0m.lx" + "2b_m_cont, 2b_mi_cont, 2b_mm_cont, 2b_mf_cont,\ + 2b_mb_cont, 2b_b_cont, 2b_bb_cont") + +(exclusion_set "2b_empty" + "2b_m_cont,2b_mi_cont,2b_mm_cont,2b_mf_cont,\ + 2b_mb_cont,2b_b_cont,2b_bb_cont") + +;; For m;mi bundle +(final_presence_set "2b_m0_stop" "2b_0m.mi") +(final_presence_set "2b_0mm.i" "2b_0mmi_cont") +(exclusion_set "2b_0mmi_cont" + "2b_0m.ii, 2b_0m.mi, 2b_0m.fi, 2b_0m.mf, 2b_0b.bb, 2b_0m.bb,\ + 2b_0m.ib, 2b_0m.mb, 2b_0m.fb, 2b_0m.lx") +(exclusion_set "2b_m0_stop" "2b_0mm.i") +(final_presence_set "2b_m1_stop" "2b_1m.mi") +(exclusion_set "2b_m1_stop" "2b_1mm.i") +(final_presence_set "2b_m_stop" "2b_m0_stop, 2b_m1_stop") + +;; For mi;i bundle +(final_presence_set "2b_mi0_stop" "2b_0mi.i") +(final_presence_set "2b_0mii." "2b_0mii_cont") +(exclusion_set "2b_0mii_cont" + "2b_0m.ii, 2b_0m.mi, 2b_0m.fi, 2b_0m.mf, 2b_0b.bb, 2b_0m.bb,\ + 2b_0m.ib, 2b_0m.mb, 2b_0m.fb, 2b_0m.lx") +(exclusion_set "2b_mi0_stop" "2b_0mii.") +(final_presence_set "2b_mi1_stop" "2b_1mi.i") +(exclusion_set "2b_mi1_stop" "2b_1mii.") +(final_presence_set "2b_mi_stop" "2b_mi0_stop, 2b_mi1_stop") + +(final_absence_set + "2b_0m.ii,2b_0mi.i,2b_0mii.,2b_0m.mi,2b_0mm.i,2b_0mmi.,\ + 2b_0m.fi,2b_0mf.i,2b_0mfi.,2b_0m.mf,2b_0mm.f,2b_0mmf.,\ + 2b_0b.bb,2b_0bb.b,2b_0bbb.,2b_0m.bb,2b_0mb.b,2b_0mbb.,\ + 2b_0m.ib,2b_0mi.b,2b_0mib.,2b_0m.mb,2b_0mm.b,2b_0mmb.,\ + 2b_0m.fb,2b_0mf.b,2b_0mfb.,2b_0m.lx,2b_0mlx., \ + 2b_1m.ii,2b_1mi.i,2b_1mii.,2b_1m.mi,2b_1mm.i,2b_1mmi.,\ + 2b_1m.fi,2b_1mf.i,2b_1mfi.,2b_1m.mf,2b_1mm.f,2b_1mmf.,\ + 2b_1b.bb,2b_1bb.b,2b_1bbb.,2b_1m.bb,2b_1mb.b,2b_1mbb.,\ + 2b_1m.ib,2b_1mi.b,2b_1mib.,2b_1m.mb,2b_1mm.b,2b_1mmb.,\ + 2b_1m.fb,2b_1mf.b,2b_1mfb.,2b_1m.lx,2b_1mlx." + "2b_m0_stop,2b_m1_stop,2b_mi0_stop,2b_mi1_stop") + +(define_insn_reservation "2b_stop_bit" 0 + (and (and (eq_attr "cpu" "itanium2") + (eq_attr "itanium_class" "stop_bit")) + (ne (symbol_ref "bundling_p") (const_int 0))) + "2b_stop|2b_m0_stop|2b_m1_stop|2b_mi0_stop|2b_mi1_stop") +(define_insn_reservation "2b_br" 0 + (and (and (eq_attr "cpu" "itanium2") + (eq_attr "itanium_class" "br")) + (ne (symbol_ref "bundling_p") (const_int 0))) "2b_B") +(define_insn_reservation "2b_scall" 0 + (and (and (eq_attr "cpu" "itanium2") + (eq_attr "itanium_class" "scall")) + (ne (symbol_ref "bundling_p") (const_int 0))) "2b_B") +(define_insn_reservation "2b_fcmp" 2 + (and (and (eq_attr "cpu" "itanium2") + (eq_attr "itanium_class" "fcmp")) + (ne (symbol_ref "bundling_p") (const_int 0))) "2b_F") +(define_insn_reservation "2b_fcvtfx" 4 + (and (and (eq_attr "cpu" "itanium2") + (eq_attr "itanium_class" "fcvtfx")) + (ne (symbol_ref "bundling_p") (const_int 0))) "2b_F") +(define_insn_reservation "2b_fld" 6 + (and (and (eq_attr "cpu" "itanium2") + (eq_attr "itanium_class" "fld")) + (ne (symbol_ref "bundling_p") (const_int 0))) "2b_M") +(define_insn_reservation "2b_fmac" 4 + (and (and (eq_attr "cpu" "itanium2") + (eq_attr "itanium_class" "fmac")) + (ne (symbol_ref "bundling_p") (const_int 0))) "2b_F") +(define_insn_reservation "2b_fmisc" 4 + (and (and (eq_attr "cpu" "itanium2") + (eq_attr "itanium_class" "fmisc")) + (ne (symbol_ref "bundling_p") (const_int 0))) "2b_F") + +;; Latency time ??? +(define_insn_reservation "2b_frar_i" 13 + (and (and (eq_attr "cpu" "itanium2") + (eq_attr "itanium_class" "frar_i")) + (ne (symbol_ref "bundling_p") (const_int 0))) + "2b_I+2b_only_ui0") +;; Latency time ??? +(define_insn_reservation "2b_frar_m" 6 + (and (and (eq_attr "cpu" "itanium2") + (eq_attr "itanium_class" "frar_m")) + (ne (symbol_ref "bundling_p") (const_int 0))) + "2b_M_only_um2") +(define_insn_reservation "2b_frbr" 2 + (and (and (eq_attr "cpu" "itanium2") + (eq_attr "itanium_class" "frbr")) + (ne (symbol_ref "bundling_p") (const_int 0))) + "2b_I+2b_only_ui0") +(define_insn_reservation "2b_frfr" 5 + (and (and (eq_attr "cpu" "itanium2") + (eq_attr "itanium_class" "frfr")) + (ne (symbol_ref "bundling_p") (const_int 0))) + "2b_M_only_um2") +(define_insn_reservation "2b_frpr" 2 + (and (and (eq_attr "cpu" "itanium2") + (eq_attr "itanium_class" "frpr")) + (ne (symbol_ref "bundling_p") (const_int 0))) + "2b_I+2b_only_ui0") + +(define_insn_reservation "2b_ialu" 1 + (and (and (eq_attr "cpu" "itanium2") + (eq_attr "itanium_class" "ialu")) + (ne (symbol_ref "bundling_p") (const_int 0))) + "2b_A") +(define_insn_reservation "2b_icmp" 1 + (and (and (eq_attr "cpu" "itanium2") + (eq_attr "itanium_class" "icmp")) + (ne (symbol_ref "bundling_p") (const_int 0))) "2b_A") +(define_insn_reservation "2b_ilog" 1 + (and (and (eq_attr "cpu" "itanium2") + (eq_attr "itanium_class" "ilog")) + (ne (symbol_ref "bundling_p") (const_int 0))) "2b_A") +;; Latency time ??? +(define_insn_reservation "2b_ishf" 1 + (and (and (eq_attr "cpu" "itanium2") + (eq_attr "itanium_class" "ishf")) + (ne (symbol_ref "bundling_p") (const_int 0))) + "2b_I+2b_only_ui0") +(define_insn_reservation "2b_ld" 1 + (and (and (eq_attr "cpu" "itanium2") + (eq_attr "itanium_class" "ld")) + (ne (symbol_ref "bundling_p") (const_int 0))) + "2b_M_only_um01") +(define_insn_reservation "2b_long_i" 1 + (and (and (eq_attr "cpu" "itanium2") + (eq_attr "itanium_class" "long_i")) + (ne (symbol_ref "bundling_p") (const_int 0))) "2b_L") + +;; Latency time ??? +(define_insn_reservation "2b_mmmul" 2 + (and (and (eq_attr "cpu" "itanium2") + (eq_attr "itanium_class" "mmmul")) + (ne (symbol_ref "bundling_p") (const_int 0))) + "2b_I+2b_only_ui0") +;; Latency time ??? +(define_insn_reservation "2b_mmshf" 2 + (and (and (eq_attr "cpu" "itanium2") + (eq_attr "itanium_class" "mmshf")) + (ne (symbol_ref "bundling_p") (const_int 0))) "2b_I") +;; Latency time ??? +(define_insn_reservation "2b_mmshfi" 1 + (and (and (eq_attr "cpu" "itanium2") + (eq_attr "itanium_class" "mmshfi")) + (ne (symbol_ref "bundling_p") (const_int 0))) "2b_I") + +(define_insn_reservation "2b_rse_m" 0 + (and (and (eq_attr "cpu" "itanium2") + (eq_attr "itanium_class" "rse_m")) + (ne (symbol_ref "bundling_p") (const_int 0))) + "(2b_0m.ii|2b_0m.mi|2b_0m.fi|2b_0m.mf|2b_0m.bb\ + |2b_0m.ib|2b_0m.mb|2b_0m.fb|2b_0m.lx)+2_1+2b_um0") +(define_insn_reservation "2b_sem" 0 + (and (and (eq_attr "cpu" "itanium2") + (eq_attr "itanium_class" "sem")) + (ne (symbol_ref "bundling_p") (const_int 0))) + "2b_M_only_um23") + +(define_insn_reservation "2b_stf" 1 + (and (and (eq_attr "cpu" "itanium2") + (eq_attr "itanium_class" "stf")) + (ne (symbol_ref "bundling_p") (const_int 0))) + "2b_M_only_um23") +(define_insn_reservation "2b_st" 1 + (and (and (eq_attr "cpu" "itanium2") + (eq_attr "itanium_class" "st")) + (ne (symbol_ref "bundling_p") (const_int 0))) + "2b_M_only_um23") +(define_insn_reservation "2b_syst_m0" 0 + (and (and (eq_attr "cpu" "itanium2") + (eq_attr "itanium_class" "syst_m0")) + (ne (symbol_ref "bundling_p") (const_int 0))) + "2b_M_only_um2") +(define_insn_reservation "2b_syst_m" 0 + (and (and (eq_attr "cpu" "itanium2") + (eq_attr "itanium_class" "syst_m")) + (ne (symbol_ref "bundling_p") (const_int 0))) + "2b_M_only_um0") +;; Reservation??? +(define_insn_reservation "2b_tbit" 1 + (and (and (eq_attr "cpu" "itanium2") + (eq_attr "itanium_class" "tbit")) + (ne (symbol_ref "bundling_p") (const_int 0))) + "2b_I+2b_only_ui0") +(define_insn_reservation "2b_toar_i" 0 + (and (and (eq_attr "cpu" "itanium2") + (eq_attr "itanium_class" "toar_i")) + (ne (symbol_ref "bundling_p") (const_int 0))) + "2b_I+2b_only_ui0") +;; Latency time ??? +(define_insn_reservation "2b_toar_m" 5 + (and (and (eq_attr "cpu" "itanium2") + (eq_attr "itanium_class" "toar_m")) + (ne (symbol_ref "bundling_p") (const_int 0))) + "2b_M_only_um2") +;; Latency time ??? +(define_insn_reservation "2b_tobr" 1 + (and (and (eq_attr "cpu" "itanium2") + (eq_attr "itanium_class" "tobr")) + (ne (symbol_ref "bundling_p") (const_int 0))) + "2b_I+2b_only_ui0") +(define_insn_reservation "2b_tofr" 5 + (and (and (eq_attr "cpu" "itanium2") + (eq_attr "itanium_class" "tofr")) + (ne (symbol_ref "bundling_p") (const_int 0))) + "2b_M_only_um23") +;; Latency time ??? +(define_insn_reservation "2b_topr" 1 + (and (and (eq_attr "cpu" "itanium2") + (eq_attr "itanium_class" "topr")) + (ne (symbol_ref "bundling_p") (const_int 0))) + "2b_I+2b_only_ui0") + +(define_insn_reservation "2b_xmpy" 4 + (and (and (eq_attr "cpu" "itanium2") + (eq_attr "itanium_class" "xmpy")) + (ne (symbol_ref "bundling_p") (const_int 0))) "2b_F") +;; Latency time ??? +(define_insn_reservation "2b_xtd" 1 + (and (and (eq_attr "cpu" "itanium2") + (eq_attr "itanium_class" "xtd")) + (ne (symbol_ref "bundling_p") (const_int 0))) "2b_I") +(define_insn_reservation "2b_chk_s" 0 + (and (and (eq_attr "cpu" "itanium2") + (eq_attr "itanium_class" "chk_s")) + (ne (symbol_ref "bundling_p") (const_int 0))) + "2b_I|2b_M_only_um23") +(define_insn_reservation "2b_lfetch" 0 + (and (and (eq_attr "cpu" "itanium2") + (eq_attr "itanium_class" "lfetch")) + (ne (symbol_ref "bundling_p") (const_int 0))) + "2b_M_only_um01") +(define_insn_reservation "2b_nop_m" 0 + (and (and (eq_attr "cpu" "itanium2") + (eq_attr "itanium_class" "nop_m")) + (ne (symbol_ref "bundling_p") (const_int 0))) "2b_M") +(define_insn_reservation "2b_nop_b" 0 + (and (and (eq_attr "cpu" "itanium2") + (eq_attr "itanium_class" "nop_b")) + (ne (symbol_ref "bundling_p") (const_int 0))) "2b_NB") +(define_insn_reservation "2b_nop_i" 0 + (and (and (eq_attr "cpu" "itanium2") + (eq_attr "itanium_class" "nop_i")) + (ne (symbol_ref "bundling_p") (const_int 0))) "2b_I") +(define_insn_reservation "2b_nop_f" 0 + (and (and (eq_attr "cpu" "itanium2") + (eq_attr "itanium_class" "nop_f")) + (ne (symbol_ref "bundling_p") (const_int 0))) "2b_F") +(define_insn_reservation "2b_nop_x" 0 + (and (and (eq_attr "cpu" "itanium2") + (eq_attr "itanium_class" "nop_x")) + (ne (symbol_ref "bundling_p") (const_int 0))) "2b_L") +(define_insn_reservation "2b_unknown" 1 + (and (and (eq_attr "cpu" "itanium2") + (eq_attr "itanium_class" "unknown")) + (ne (symbol_ref "bundling_p") (const_int 0))) "2b_empty") +(define_insn_reservation "2b_nop" 0 + (and (and (eq_attr "cpu" "itanium2") + (eq_attr "itanium_class" "nop")) + (ne (symbol_ref "bundling_p") (const_int 0))) + "2b_M|2b_NB|2b_I|2b_F") +(define_insn_reservation "2b_ignore" 0 + (and (and (eq_attr "cpu" "itanium2") + (eq_attr "itanium_class" "ignore")) + (ne (symbol_ref "bundling_p") (const_int 0))) "nothing") + +(define_insn_reservation "2b_pre_cycle" 0 + (and (and (eq_attr "cpu" "itanium2") + (eq_attr "itanium_class" "pre_cycle")) + (ne (symbol_ref "bundling_p") (const_int 0))) + "(2b_0m_bs, 2b_m_cont) \ + | (2b_0mi_bs, 2b_mi_cont) \ + | (2b_0mm_bs, 2b_mm_cont) \ + | (2b_0mf_bs, 2b_mf_cont) \ + | (2b_0b_bs, 2b_b_cont) \ + | (2b_0bb_bs, 2b_bb_cont) \ + | (2b_0mb_bs, 2b_mb_cont) \ + | (2b_1m_bs, 2b_m_cont) \ + | (2b_1mi_bs, 2b_mi_cont) \ + | (2b_1mm_bs, 2b_mm_cont) \ + | (2b_1mf_bs, 2b_mf_cont) \ + | (2b_1b_bs, 2b_b_cont) \ + | (2b_1bb_bs, 2b_bb_cont) \ + | (2b_1mb_bs, 2b_mb_cont) \ + | (2b_m_stop, 2b_0mmi_cont) \ + | (2b_mi_stop, 2b_0mii_cont)") + diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi index 2adb009c46a..8b3e365a550 100644 --- a/gcc/doc/invoke.texi +++ b/gcc/doc/invoke.texi @@ -9519,6 +9519,14 @@ A fixed register is one that the register allocator can not use. This is useful when compiling kernel code. A register range is specified as two registers separated by a dash. Multiple register ranges can be specified separated by a comma. + +@item -mearly-stop-bits +@itemx -mno-early-stop-bits +@opindex mearly-stop-bits +@opindex mno-early-stop-bits +Allow stop bits to be placed earlier than immediately preceding the +instruction that triggered the stop bit. This can improve instruction +scheduling, but does not always do so. @end table @node D30V Options diff --git a/gcc/doc/md.texi b/gcc/doc/md.texi index 2d90de4537d..5b91c082680 100644 --- a/gcc/doc/md.texi +++ b/gcc/doc/md.texi @@ -5586,16 +5586,23 @@ which the unit is bound. The automaton should be described in construction @code{define_automaton}. You should give @dfn{automaton-name}, if there is a defined automaton. +The assignment of units to automata are constrained by the uses of the +units in insn reservations. The most important constraint is: if a +unit reservation is present on a particular cycle of an alternative +for an insn reservation, then some unit from the same automaton must +be present on the same cycle for the other alternatives of the insn +reservation. The rest of the constraints are mentioned in the +description of the subsequent constructions. + @findex define_query_cpu_unit @cindex querying function unit reservations The following construction describes CPU functional units analogously -to @code{define_cpu_unit}. If we use automata without their -minimization, the reservation of such units can be queried for an -automaton state. The instruction scheduler never queries reservation -of functional units for given automaton state. So as a rule, you -don't need this construction. This construction could be used for -future code generation goals (e.g. to generate @acronym{VLIW} insn -templates). +to @code{define_cpu_unit}. The reservation of such units can be +queried for an automaton state. The instruction scheduler never +queries reservation of functional units for given automaton state. So +as a rule, you don't need this construction. This construction could +be used for future code generation goals (e.g. to generate +@acronym{VLIW} insn templates). @smallexample (define_query_cpu_unit @var{unit-names} [@var{automaton-name}]) @@ -5744,7 +5751,9 @@ of insn @samp{store} (not a stored value). @findex exclusion_set @findex presence_set +@findex final_presence_set @findex absence_set +@findex final_absence_set @cindex VLIW @cindex RISC Usually the following three constructions are used to describe @@ -5754,13 +5763,19 @@ used for @acronym{RISC} processors too. @smallexample (exclusion_set @var{unit-names} @var{unit-names}) -(presence_set @var{unit-names} @var{unit-names}) -(absence_set @var{unit-names} @var{unit-names}) +(presence_set @var{unit-names} @var{patterns}) +(final_presence_set @var{unit-names} @var{patterns}) +(absence_set @var{unit-names} @var{patterns}) +(final_absence_set @var{unit-names} @var{patterns}) @end smallexample @var{unit-names} is a string giving names of functional units separated by commas. +@var{patterns} is a string giving patterns of functional units +separated by comma. Currently pattern is is one unit or units +separated by white-spaces. + The first construction (@samp{exclusion_set}) means that each functional unit in the first string can not be reserved simultaneously with a unit whose name is in the second string and vice versa. For @@ -5771,22 +5786,75 @@ point insns or only double floating point insns. The second construction (@samp{presence_set}) means that each functional unit in the first string can not be reserved unless at -least one of units whose names are in the second string is reserved. -This is an asymmetric relation. For example, it is useful for -description that @acronym{VLIW} @samp{slot1} is reserved after -@samp{slot0} reservation. - -The third construction (@samp{absence_set}) means that each functional -unit in the first string can be reserved only if each unit whose name -is in the second string is not reserved. This is an asymmetric -relation (actually @samp{exclusion_set} is analogous to this one but -it is symmetric). For example, it is useful for description that -@acronym{VLIW} @samp{slot0} can not be reserved after @samp{slot1} or -@samp{slot2} reservation. +least one of pattern of units whose names are in the second string is +reserved. This is an asymmetric relation. For example, it is useful +for description that @acronym{VLIW} @samp{slot1} is reserved after +@samp{slot0} reservation. We could describe it by the following +construction + +@smallexample +(presence_set "slot1" "slot0") +@end smallexample + +Or @samp{slot1} is reserved only after @samp{slot0} and unit @samp{b0} +reservation. In this case we could write + +@smallexample +(presence_set "slot1" "slot0 b0") +@end smallexample + +The third construction (@samp{final_presence_set}) is analogous to +@samp{presence_set}. The difference between them is when checking is +done. When an instruction is issued in given automaton state +reflecting all current and planned unit reservations, the automaton +state is changed. The first state is a source state, the second one +is a result state. Checking for @samp{presence_set} is done on the +source state reservation, checking for @samp{final_presence_set} is +done on the result reservation. This construction is useful to +describe a reservation which is actually two subsequent reservations. +For example, if we use + +@smallexample +(presence_set "slot1" "slot0") +@end smallexample + +the following insn will be never issued (because @samp{slot1} requires +@samp{slot0} which is absent in the source state). + +@smallexample +(define_reservation "insn_and_nop" "slot0 + slot1") +@end smallexample + +but it can be issued if we use analogous @samp{final_presence_set}. + +The forth construction (@samp{absence_set}) means that each functional +unit in the first string can be reserved only if each pattern of units +whose names are in the second string is not reserved. This is an +asymmetric relation (actually @samp{exclusion_set} is analogous to +this one but it is symmetric). For example, it is useful for +description that @acronym{VLIW} @samp{slot0} can not be reserved after +@samp{slot1} or @samp{slot2} reservation. We could describe it by the +following construction + +@smallexample +(absence_set "slot2" "slot0, slot1") +@end smallexample + +Or @samp{slot2} can not be reserved if @samp{slot0} and unit @samp{b0} +are reserved or @samp{slot1} and unit @samp{b1} are reserved. In +this case we could write + +@smallexample +(absence_set "slot2" "slot0 b0, slot1 b1") +@end smallexample All functional units mentioned in a set should belong to the same automaton. +The last construction (@samp{final_absence_set}) is analogous to +@samp{absence_set} but checking is done on the result (state) +reservation. See comments for @samp{final_presence_set}. + @findex automata_option @cindex deterministic finite state automaton @cindex nondeterministic finite state automaton @@ -5804,8 +5872,8 @@ code. Currently there are the following options: @itemize @bullet @item @dfn{no-minimization} makes no minimization of the automaton. This is -only worth to do when we are going to query CPU functional unit -reservations in an automaton state. +only worth to do when we are debugging the description and need to +look more accurately at reservations of states. @item @dfn{time} means printing additional time statistics about diff --git a/gcc/doc/tm.texi b/gcc/doc/tm.texi index 53ba81288d0..45f38f0f921 100644 --- a/gcc/doc/tm.texi +++ b/gcc/doc/tm.texi @@ -5629,6 +5629,16 @@ scheduling one insn causes other insns to become ready in the same cycle. These other insns can then be taken into account properly. @end deftypefn +@deftypefn {Target Hook} void TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK (rtx @var{head}, rtx @var{tail}) +This hook is called after evaluation forward dependencies of insns in +chain given by two parameter values (@var{head} and @var{tail} +correspondingly) but before insns scheduling of the insn chain. For +example, it can be used for better insn classification if it requires +analysis of dependencies. This hook can use backward and forward +dependencies of the insn scheduler because they are already +calculated. +@end deftypefn + @deftypefn {Target Hook} void TARGET_SCHED_INIT (FILE *@var{file}, int @var{verbose}, int @var{max_ready}) This hook is executed by the scheduler at the beginning of each block of instructions that are to be scheduled. @var{file} is either a null @@ -5715,6 +5725,30 @@ schedules to choose the best one. The default is no multipass scheduling. @end deftypefn +@deftypefn {Target Hook} int TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD (rtx) + +This hook controls what insns from the ready insn queue will be +considered for the multipass insn scheduling. If the hook returns +zero for insn passed as the parameter, the insn will be not chosen to +be issued. + +The default is that any ready insns can be choosen to be issued. +@end deftypefn + +@deftypefn {Target Hook} int TARGET_SCHED_DFA_NEW_CYCLE (FILE *, int, rtx, int, int, int *) + +This hook is called by the insn scheduler before issuing insn passed +as the third parameter on given cycle. If the hook returns nonzero, +the insn is not issued on given processors cycle. Instead of that, +the processor cycle is advanced. If the value passed through the last +parameter is zero, the insn ready queue is not sorted on the new cycle +start as usually. The first parameter passes file for debugging +output. The second one passes the scheduler verbose level of the +debugging output. The forth and the fifth parameter values are +correspondingly processor cycle on which the previous insn has been +issued and the current processor cycle. +@end deftypefn + @deftypefn {Target Hook} void TARGET_SCHED_INIT_DFA_BUBBLES (void) The @acronym{DFA}-based scheduler could take the insertion of nop operations for better insn scheduling into account. It can be done diff --git a/gcc/genattr.c b/gcc/genattr.c index 02c5c345d51..1e910edb353 100644 --- a/gcc/genattr.c +++ b/gcc/genattr.c @@ -441,6 +441,11 @@ main (argc, argv) printf (" unit with given code is currently reserved in given\n"); printf (" DFA state. */\n"); printf ("extern int cpu_unit_reservation_p PARAMS ((state_t, int));\n"); + printf ("/* Clean insn code cache. It should be called if there\n"); + printf (" is a chance that condition value in a\n"); + printf (" define_insn_reservation will be changed after\n"); + printf (" last call of dfa_start. */\n"); + printf ("extern void dfa_clean_insn_cache PARAMS ((void));\n\n"); printf ("#endif\n\n"); printf ("/* Initiate and finish work with DFA. They should be\n"); printf (" called as the first and the last interface\n"); diff --git a/gcc/genattrtab.c b/gcc/genattrtab.c index 266fc41b782..53ebbefd3ae 100644 --- a/gcc/genattrtab.c +++ b/gcc/genattrtab.c @@ -6115,10 +6115,18 @@ from the machine description file `md'. */\n\n"); gen_presence_set (desc); break; + case FINAL_PRESENCE_SET: + gen_final_presence_set (desc); + break; + case ABSENCE_SET: gen_absence_set (desc); break; + case FINAL_ABSENCE_SET: + gen_final_absence_set (desc); + break; + case DEFINE_AUTOMATON: gen_automaton (desc); break; diff --git a/gcc/genattrtab.h b/gcc/genattrtab.h index ea1f23991ea..8d0d35ecf8d 100644 --- a/gcc/genattrtab.h +++ b/gcc/genattrtab.h @@ -33,7 +33,9 @@ extern void gen_query_cpu_unit PARAMS ((rtx)); extern void gen_bypass PARAMS ((rtx)); extern void gen_excl_set PARAMS ((rtx)); extern void gen_presence_set PARAMS ((rtx)); +extern void gen_final_presence_set PARAMS ((rtx)); extern void gen_absence_set PARAMS ((rtx)); +extern void gen_final_absence_set PARAMS ((rtx)); extern void gen_automaton PARAMS ((rtx)); extern void gen_automata_option PARAMS ((rtx)); extern void gen_reserv PARAMS ((rtx)); diff --git a/gcc/genautomata.c b/gcc/genautomata.c index b64f6f5fbcf..308c189dfbe 100644 --- a/gcc/genautomata.c +++ b/gcc/genautomata.c @@ -48,7 +48,8 @@ Software Foundation, 59 Temple Place - Suite 330, Boston, MA automaton state. 4. Several constructions to describe impossible reservations - (`exclusion_set', `presence_set', and `absence_set'). + (`exclusion_set', `presence_set', `final_presence_set', + `absence_set', and `final_absence_set'). 5. No reverse automata are generated. Trace instruction scheduling requires this. It can be easily added in the future if we @@ -57,8 +58,7 @@ Software Foundation, 59 Temple Place - Suite 330, Boston, MA 6. Union of automaton states are not generated yet. It is planned to be implemented. Such feature is needed to make more accurate interlock insn scheduling to get state describing functional - unit reservation in a joint CFG point. -*/ + unit reservation in a joint CFG point. */ /* This file code processes constructions of machine description file which describes automaton used for recognition of processor pipeline @@ -67,7 +67,8 @@ Software Foundation, 59 Temple Place - Suite 330, Boston, MA The translator functions `gen_cpu_unit', `gen_query_cpu_unit', `gen_bypass', `gen_excl_set', `gen_presence_set', - `gen_absence_set', `gen_automaton', `gen_automata_option', + `gen_final_presence_set', `gen_absence_set', + `gen_final_absence_set', `gen_automaton', `gen_automata_option', `gen_reserv', `gen_insn_reserv' are called from file `genattrtab.c'. They transform RTL constructions describing automata in .md file into internal representation convenient for @@ -166,7 +167,7 @@ struct unit_decl; struct bypass_decl; struct result_decl; struct automaton_decl; -struct unit_rel_decl; +struct unit_pattern_rel_decl; struct reserv_decl; struct insn_reserv_decl; struct decl; @@ -181,6 +182,8 @@ struct oneof_regexp; struct regexp; struct description; struct unit_set_el; +struct pattern_set_el; +struct pattern_reserv; struct state; struct alt_state; struct arc; @@ -193,6 +196,8 @@ typedef struct unit_decl *unit_decl_t; typedef struct decl *decl_t; typedef struct regexp *regexp_t; typedef struct unit_set_el *unit_set_el_t; +typedef struct pattern_set_el *pattern_set_el_t; +typedef struct pattern_reserv *pattern_reserv_t; typedef struct alt_state *alt_state_t; typedef struct state *state_t; typedef struct arc *arc_t; @@ -203,28 +208,30 @@ typedef struct state_ainsn_table *state_ainsn_table_t; /* Prototypes of functions gen_cpu_unit, gen_query_cpu_unit, - gen_bypass, gen_excl_set, gen_presence_set, gen_absence_set, - gen_automaton, gen_automata_option, gen_reserv, gen_insn_reserv, + gen_bypass, gen_excl_set, gen_presence_set, gen_final_presence_set, + gen_absence_set, gen_final_absence_set, gen_automaton, + gen_automata_option, gen_reserv, gen_insn_reserv, initiate_automaton_gen, expand_automata, write_automata are described on the file top because the functions are called from function `main'. */ -static void *create_node PARAMS ((size_t)); -static void *copy_node PARAMS ((const void *, size_t)); -static char *check_name PARAMS ((char *, pos_t)); -static char *next_sep_el PARAMS ((char **, int, int)); -static int n_sep_els PARAMS ((char *, int, int)); -static char **get_str_vect PARAMS ((char *, int *, int, int)); -static regexp_t gen_regexp_el PARAMS ((char *)); -static regexp_t gen_regexp_repeat PARAMS ((char *)); -static regexp_t gen_regexp_allof PARAMS ((char *)); -static regexp_t gen_regexp_oneof PARAMS ((char *)); -static regexp_t gen_regexp_sequence PARAMS ((char *)); -static regexp_t gen_regexp PARAMS ((char *)); - -static unsigned string_hash PARAMS ((const char *)); -static hashval_t automaton_decl_hash PARAMS ((const void *)); -static int automaton_decl_eq_p PARAMS ((const void *, +static void *create_node PARAMS ((size_t)); +static void *copy_node PARAMS ((const void *, size_t)); +static char *check_name PARAMS ((char *, pos_t)); +static char *next_sep_el PARAMS ((char **, int, int)); +static int n_sep_els PARAMS ((char *, int, int)); +static char **get_str_vect PARAMS ((char *, int *, int, int)); +static void gen_presence_absence_set PARAMS ((rtx, int, int)); +static regexp_t gen_regexp_el PARAMS ((char *)); +static regexp_t gen_regexp_repeat PARAMS ((char *)); +static regexp_t gen_regexp_allof PARAMS ((char *)); +static regexp_t gen_regexp_oneof PARAMS ((char *)); +static regexp_t gen_regexp_sequence PARAMS ((char *)); +static regexp_t gen_regexp PARAMS ((char *)); + +static unsigned string_hash PARAMS ((const char *)); +static unsigned automaton_decl_hash PARAMS ((const void *)); +static int automaton_decl_eq_p PARAMS ((const void *, const void *)); static decl_t insert_automaton_decl PARAMS ((decl_t)); static decl_t find_automaton_decl PARAMS ((char *)); @@ -250,10 +257,15 @@ static void finish_decl_table PARAMS ((void)); static unit_set_el_t process_excls PARAMS ((char **, int, pos_t)); static void add_excls PARAMS ((unit_set_el_t, unit_set_el_t, pos_t)); -static unit_set_el_t process_presence_absence - PARAMS ((char **, int, pos_t, int)); -static void add_presence_absence PARAMS ((unit_set_el_t, unit_set_el_t, - pos_t, int)); +static unit_set_el_t process_presence_absence_names + PARAMS ((char **, int, pos_t, + int, int)); +static pattern_set_el_t process_presence_absence_patterns + PARAMS ((char ***, int, pos_t, + int, int)); +static void add_presence_absence PARAMS ((unit_set_el_t, + pattern_set_el_t, + pos_t, int, int)); static void process_decls PARAMS ((void)); static struct bypass_decl *find_bypass PARAMS ((struct bypass_decl *, struct insn_reserv_decl *)); @@ -263,7 +275,8 @@ static void process_regexp_decls PARAMS ((void)); static void check_usage PARAMS ((void)); static int loop_in_regexp PARAMS ((regexp_t, decl_t)); static void check_loops_in_regexps PARAMS ((void)); -static int process_regexp_cycles PARAMS ((regexp_t, int)); +static void process_regexp_cycles PARAMS ((regexp_t, int, int, + int *, int *)); static void evaluate_max_reserv_cycles PARAMS ((void)); static void check_all_description PARAMS ((void)); @@ -310,8 +323,8 @@ static int state_eq_p PARAMS ((const void *, const void *)); static state_t insert_state PARAMS ((state_t)); static void set_state_reserv PARAMS ((state_t, int, int)); static int intersected_state_reservs_p PARAMS ((state_t, state_t)); -static state_t states_union PARAMS ((state_t, state_t)); -static state_t state_shift PARAMS ((state_t)); +static state_t states_union PARAMS ((state_t, state_t, reserv_sets_t)); +static state_t state_shift PARAMS ((state_t, reserv_sets_t)); static void initiate_states PARAMS ((void)); static void finish_states PARAMS ((void)); @@ -338,8 +351,12 @@ static void finish_automata_lists PARAMS ((void)); static void initiate_excl_sets PARAMS ((void)); static reserv_sets_t get_excl_set PARAMS ((reserv_sets_t)); -static void initiate_presence_absence_sets PARAMS ((void)); -static reserv_sets_t get_presence_absence_set PARAMS ((reserv_sets_t, int)); +static pattern_reserv_t form_reserv_sets_list PARAMS ((pattern_set_el_t)); +static void initiate_presence_absence_pattern_sets PARAMS ((void)); +static int check_presence_pattern_sets PARAMS ((reserv_sets_t, + reserv_sets_t, int)); +static int check_absence_pattern_sets PARAMS ((reserv_sets_t, reserv_sets_t, + int)); static regexp_t copy_insn_regexp PARAMS ((regexp_t)); static regexp_t transform_1 PARAMS ((regexp_t)); @@ -350,10 +367,9 @@ static regexp_t regexp_transform_func static regexp_t transform_regexp PARAMS ((regexp_t)); static void transform_insn_regexps PARAMS ((void)); -static void process_unit_to_form_the_same_automaton_unit_lists - PARAMS ((regexp_t, regexp_t, int)); -static void form_the_same_automaton_unit_lists_from_regexp PARAMS ((regexp_t)); -static void form_the_same_automaton_unit_lists PARAMS ((void)); +static void check_unit_distribution_in_reserv PARAMS ((const char *, regexp_t, + regexp_t, int)); +static void check_regexp_units_distribution PARAMS ((const char *, regexp_t)); static void check_unit_distributions_to_automata PARAMS ((void)); static int process_seq_for_forming_states PARAMS ((regexp_t, automaton_t, @@ -366,9 +382,10 @@ static void create_alt_states PARAMS ((automaton_t)); static void form_ainsn_with_same_reservs PARAMS ((automaton_t)); +static reserv_sets_t form_reservs_matter PARAMS ((automaton_t)); static void make_automaton PARAMS ((automaton_t)); static void form_arcs_marked_by_insn PARAMS ((state_t)); -static void create_composed_state PARAMS ((state_t, arc_t, vla_ptr_t *)); +static int create_composed_state PARAMS ((state_t, arc_t, vla_ptr_t *)); static void NDFA_to_DFA PARAMS ((automaton_t)); static void pass_state_graph PARAMS ((state_t, void (*) (state_t))); static void pass_states PARAMS ((automaton_t, @@ -379,7 +396,8 @@ static int set_out_arc_insns_equiv_num PARAMS ((state_t, int)); static void clear_arc_insns_equiv_num PARAMS ((state_t)); static void copy_equiv_class PARAMS ((vla_ptr_t *to, const vla_ptr_t *from)); -static int state_is_differed PARAMS ((state_t, int, int)); +static int first_cycle_unit_presence PARAMS ((state_t, int)); +static int state_is_differed PARAMS ((state_t, state_t, int, int)); static state_t init_equiv_class PARAMS ((state_t *states, int)); static int partition_equiv_class PARAMS ((state_t *, int, vla_ptr_t *, int *)); @@ -449,7 +467,7 @@ static void add_vect_el PARAMS ((vla_hwint_t *, static void add_states_vect_el PARAMS ((state_t)); static void output_trans_table PARAMS ((automaton_t)); static void output_state_alts_table PARAMS ((automaton_t)); -static int min_issue_delay_pass_states PARAMS ((state_t, ainsn_t)); +static int min_issue_delay_pass_states PARAMS ((state_t, ainsn_t)); static int min_issue_delay PARAMS ((state_t, ainsn_t)); static void initiate_min_issue_delay_pass_states PARAMS ((void)); static void output_min_issue_delay_table PARAMS ((automaton_t)); @@ -483,11 +501,13 @@ static int units_cmp PARAMS ((const void *, const void *)); static void output_get_cpu_unit_code_func PARAMS ((void)); static void output_cpu_unit_reservation_p PARAMS ((void)); +static void output_dfa_clean_insn_cache_func PARAMS ((void)); static void output_dfa_start_func PARAMS ((void)); static void output_dfa_finish_func PARAMS ((void)); static void output_regexp PARAMS ((regexp_t )); static void output_unit_set_el_list PARAMS ((unit_set_el_t)); +static void output_pattern_set_el_list PARAMS ((pattern_set_el_t)); static void output_description PARAMS ((void)); static void output_automaton_name PARAMS ((FILE *, automaton_t)); static void output_automaton_units PARAMS ((automaton_t)); @@ -714,14 +734,6 @@ struct unit_decl regexp. */ char unit_is_used; - /* The following field value is used to form cyclic lists of units - which should be in the same automaton because the unit is - reserved not on all alternatives of a regexp on a cycle. */ - unit_decl_t the_same_automaton_unit; - /* The following field is TRUE if we already reported that the unit - is not in the same automaton. */ - int the_same_automaton_message_reported_p; - /* The following field value is order number (0, 1, ...) of given unit. */ int unit_num; @@ -733,15 +745,21 @@ struct unit_decl which given unit occurs in insns. Zero value means that given unit is not used in insns. */ int max_occ_cycle_num; + /* The following field value is minimal cycle number (0, ...) on + which given unit occurs in insns. -1 value means that given + unit is not used in insns. */ + int min_occ_cycle_num; /* The following list contains units which conflict with given unit. */ unit_set_el_t excl_list; - /* The following list contains units which are required to + /* The following list contains patterns which are required to reservation of given unit. */ - unit_set_el_t presence_list; - /* The following list contains units which should be not present in - reservation for given unit. */ - unit_set_el_t absence_list; + pattern_set_el_t presence_list; + pattern_set_el_t final_presence_list; + /* The following list contains patterns which should be not present + in reservation for given unit. */ + pattern_set_el_t absence_list; + pattern_set_el_t final_absence_list; /* The following is used only when `query_p' has nonzero value. This is query number for the unit. */ int query_num; @@ -751,6 +769,11 @@ struct unit_decl /* The following field value is number of the automaton to which given unit belongs. */ int corresponding_automaton_num; + /* If the following value is not zero, the cpu unit is present in a + `exclusion_set' or in right part of a `presence_set', + `final_presence_set', `absence_set', and + `final_absence_set'define_query_cpu_unit. */ + char in_set_p; }; /* This describes define_bypass (see file rtl.def). */ @@ -790,15 +813,26 @@ struct automaton_decl automaton_t corresponding_automaton; }; -/* This describes unit relations: exclusion_set, presence_set, or - absence_set (see file rtl.def). */ -struct unit_rel_decl +/* This describes exclusion relations: exclusion_set (see file + rtl.def). */ +struct excl_rel_decl { - int names_num; + int all_names_num; int first_list_length; char *names [1]; }; +/* This describes unit relations: [final_]presence_set or + [final_]absence_set (see file rtl.def). */ +struct unit_pattern_rel_decl +{ + int final_p; + int names_num; + int patterns_num; + char **names; + char ***patterns; +}; + /* This describes define_reservation (see file rtl.def). */ struct reserv_decl { @@ -872,9 +906,9 @@ struct decl struct unit_decl unit; struct bypass_decl bypass; struct automaton_decl automaton; - struct unit_rel_decl excl; - struct unit_rel_decl presence; - struct unit_rel_decl absence; + struct excl_rel_decl excl; + struct unit_pattern_rel_decl presence; + struct unit_pattern_rel_decl absence; struct reserv_decl reserv; struct insn_reserv_decl insn_reserv; } decl; @@ -993,22 +1027,41 @@ struct description }; - /* The following nodes are created in automaton checker. */ -/* The following nodes represent exclusion, presence, absence set for - cpu units. Each element are accessed through only one excl_list, - presence_list, absence_list. */ +/* The following nodes represent exclusion set for cpu units. Each + element is accessed through only one excl_list. */ struct unit_set_el { unit_decl_t unit_decl; unit_set_el_t next_unit_set_el; }; +/* The following nodes represent presence or absence pattern for cpu + units. Each element is accessed through only one presence_list or + absence_list. */ +struct pattern_set_el +{ + /* The number of units in unit_decls. */ + int units_num; + /* The units forming the pattern. */ + struct unit_decl **unit_decls; + pattern_set_el_t next_pattern_set_el; +}; /* The following nodes are created in automaton generator. */ + +/* The following nodes represent presence or absence pattern for cpu + units. Each element is accessed through only one element of + unit_presence_set_table or unit_absence_set_table. */ +struct pattern_reserv +{ + reserv_sets_t reserv; + pattern_reserv_t next_pattern_reserv; +}; + /* The following node type describes state automaton. The state may be deterministic or non-deterministic. Non-deterministic state has several component states which represent alternative cpu units @@ -1035,11 +1088,12 @@ struct state char it_was_placed_in_stack_for_NDFA_forming; /* The following field is used to form DFA. */ char it_was_placed_in_stack_for_DFA_forming; - /* The following field is used to transform NDFA to DFA. The field - value is not NULL if the state is a compound state. In this case - the value of field `unit_sets_list' is NULL. All states in the - list are in the hash table. The list is formed through field - `next_sorted_alt_state'. */ + /* The following field is used to transform NDFA to DFA and DFA + minimization. The field value is not NULL if the state is a + compound state. In this case the value of field `unit_sets_list' + is NULL. All states in the list are in the hash table. The list + is formed through field `next_sorted_alt_state'. We should + support only one level of nesting state. */ alt_state_t component_states; /* The following field is used for passing graph of states. */ int pass_num; @@ -1578,30 +1632,32 @@ n_sep_els (s, sep, par_flag) /* Given a string and a separator, return vector of strings which are elements in the string and number of elements through els_num. - Take parentheses into account if PAR_FLAG has nonzero value. + Take parentheses into account if PAREN_P has nonzero value. The + function also inserts the end marker NULL at the end of vector. Return 0 for the null string, -1 if parantheses are not balanced. */ static char ** -get_str_vect (str, els_num, sep, par_flag) +get_str_vect (str, els_num, sep, paren_p) char *str; int *els_num; int sep; - int par_flag; + int paren_p; { int i; char **vect; char **pstr; - *els_num = n_sep_els (str, sep, par_flag); + *els_num = n_sep_els (str, sep, paren_p); if (*els_num <= 0) return NULL; - obstack_blank (&irp, sizeof (char *) * (*els_num)); + obstack_blank (&irp, sizeof (char *) * (*els_num + 1)); vect = (char **) obstack_base (&irp); obstack_finish (&irp); pstr = &str; for (i = 0; i < *els_num; i++) - vect [i] = next_sep_el (pstr, sep, par_flag); - if (next_sep_el (pstr, sep, par_flag) != NULL) + vect [i] = next_sep_el (pstr, sep, paren_p); + if (next_sep_el (pstr, sep, paren_p) != NULL) abort (); + vect [i] = NULL; return vect; } @@ -1618,7 +1674,8 @@ gen_cpu_unit (def) int vect_length; int i; - str_cpu_units = get_str_vect ((char *) XSTR (def, 0), &vect_length, ',', 0); + str_cpu_units = get_str_vect ((char *) XSTR (def, 0), &vect_length, ',', + FALSE); if (str_cpu_units == NULL) fatal ("invalid string `%s' in define_cpu_unit", XSTR (def, 0)); for (i = 0; i < vect_length; i++) @@ -1629,6 +1686,8 @@ gen_cpu_unit (def) DECL_UNIT (decl)->name = check_name (str_cpu_units [i], decl->pos); DECL_UNIT (decl)->automaton_name = (char *) XSTR (def, 1); DECL_UNIT (decl)->query_p = 0; + DECL_UNIT (decl)->min_occ_cycle_num = -1; + DECL_UNIT (decl)->in_set_p = 0; VLA_PTR_ADD (decls, decl); num_dfa_decls++; } @@ -1647,7 +1706,8 @@ gen_query_cpu_unit (def) int vect_length; int i; - str_cpu_units = get_str_vect ((char *) XSTR (def, 0), &vect_length, ',', 0); + str_cpu_units = get_str_vect ((char *) XSTR (def, 0), &vect_length, ',', + FALSE); if (str_cpu_units == NULL) fatal ("invalid string `%s' in define_query_cpu_unit", XSTR (def, 0)); for (i = 0; i < vect_length; i++) @@ -1679,10 +1739,10 @@ gen_bypass (def) int in_length; int i, j; - out_insns = get_str_vect ((char *) XSTR (def, 1), &out_length, ',', 0); + out_insns = get_str_vect ((char *) XSTR (def, 1), &out_length, ',', FALSE); if (out_insns == NULL) fatal ("invalid string `%s' in define_bypass", XSTR (def, 1)); - in_insns = get_str_vect ((char *) XSTR (def, 2), &in_length, ',', 0); + in_insns = get_str_vect ((char *) XSTR (def, 2), &in_length, ',', FALSE); if (in_insns == NULL) fatal ("invalid string `%s' in define_bypass", XSTR (def, 2)); for (i = 0; i < out_length; i++) @@ -1703,7 +1763,7 @@ gen_bypass (def) /* Process an EXCLUSION_SET. This gives information about a cpu unit conflicts. We fill a - struct unit_rel_decl (excl) with information used later by + struct excl_rel_decl (excl) with information used later by `expand_automata'. */ void gen_excl_set (def) @@ -1717,18 +1777,18 @@ gen_excl_set (def) int i; first_str_cpu_units - = get_str_vect ((char *) XSTR (def, 0), &first_vect_length, ',', 0); + = get_str_vect ((char *) XSTR (def, 0), &first_vect_length, ',', FALSE); if (first_str_cpu_units == NULL) fatal ("invalid first string `%s' in exclusion_set", XSTR (def, 0)); second_str_cpu_units = get_str_vect ((char *) XSTR (def, 1), &length, ',', - 0); + FALSE); if (second_str_cpu_units == NULL) fatal ("invalid second string `%s' in exclusion_set", XSTR (def, 1)); length += first_vect_length; decl = create_node (sizeof (struct decl) + (length - 1) * sizeof (char *)); decl->mode = dm_excl; decl->pos = 0; - DECL_EXCL (decl)->names_num = length; + DECL_EXCL (decl)->all_names_num = length; DECL_EXCL (decl)->first_list_length = first_vect_length; for (i = 0; i < length; i++) if (i < first_vect_length) @@ -1740,86 +1800,126 @@ gen_excl_set (def) num_dfa_decls++; } -/* Process a PRESENCE_SET. +/* Process a PRESENCE_SET, a FINAL_PRESENCE_SET, an ABSENCE_SET, + FINAL_ABSENCE_SET (it is depended on PRESENCE_P and FINAL_P). This gives information about a cpu unit reservation requirements. - We fill a struct unit_rel_decl (presence) with information used - later by `expand_automata'. */ -void -gen_presence_set (def) + We fill a struct unit_pattern_rel_decl with information used later + by `expand_automata'. */ +static void +gen_presence_absence_set (def, presence_p, final_p) rtx def; + int presence_p; + int final_p; { decl_t decl; - char **first_str_cpu_units; - char **second_str_cpu_units; - int first_vect_length; + char **str_cpu_units; + char ***str_patterns; + int cpu_units_length; int length; + int patterns_length; int i; - first_str_cpu_units - = get_str_vect ((char *) XSTR (def, 0), &first_vect_length, ',', 0); - if (first_str_cpu_units == NULL) - fatal ("invalid first string `%s' in presence_set", XSTR (def, 0)); - second_str_cpu_units = get_str_vect ((char *) XSTR (def, 1), &length, ',', - 0); - if (second_str_cpu_units == NULL) - fatal ("invalid second string `%s' in presence_set", XSTR (def, 1)); - length += first_vect_length; - decl = create_node (sizeof (struct decl) + (length - 1) * sizeof (char *)); - decl->mode = dm_presence; + str_cpu_units = get_str_vect ((char *) XSTR (def, 0), &cpu_units_length, ',', + FALSE); + if (str_cpu_units == NULL) + fatal ((presence_p + ? (final_p + ? "invalid first string `%s' in final_presence_set" + : "invalid first string `%s' in presence_set") + : (final_p + ? "invalid first string `%s' in final_absence_set" + : "invalid first string `%s' in absence_set")), + XSTR (def, 0)); + str_patterns = (char ***) get_str_vect ((char *) XSTR (def, 1), + &patterns_length, ',', FALSE); + if (str_patterns == NULL) + fatal ((presence_p + ? (final_p + ? "invalid second string `%s' in final_presence_set" + : "invalid second string `%s' in presence_set") + : (final_p + ? "invalid second string `%s' in final_absence_set" + : "invalid second string `%s' in absence_set")), XSTR (def, 1)); + for (i = 0; i < patterns_length; i++) + { + str_patterns [i] = get_str_vect ((char *) str_patterns [i], &length, ' ', + FALSE); + if (str_patterns [i] == NULL) + abort (); + } + decl = create_node (sizeof (struct decl)); decl->pos = 0; - DECL_PRESENCE (decl)->names_num = length; - DECL_PRESENCE (decl)->first_list_length = first_vect_length; - for (i = 0; i < length; i++) - if (i < first_vect_length) - DECL_PRESENCE (decl)->names [i] = first_str_cpu_units [i]; - else - DECL_PRESENCE (decl)->names [i] - = second_str_cpu_units [i - first_vect_length]; + if (presence_p) + { + decl->mode = dm_presence; + DECL_PRESENCE (decl)->names_num = cpu_units_length; + DECL_PRESENCE (decl)->names = str_cpu_units; + DECL_PRESENCE (decl)->patterns = str_patterns; + DECL_PRESENCE (decl)->patterns_num = patterns_length; + DECL_PRESENCE (decl)->final_p = final_p; + } + else + { + decl->mode = dm_absence; + DECL_ABSENCE (decl)->names_num = cpu_units_length; + DECL_ABSENCE (decl)->names = str_cpu_units; + DECL_ABSENCE (decl)->patterns = str_patterns; + DECL_ABSENCE (decl)->patterns_num = patterns_length; + DECL_ABSENCE (decl)->final_p = final_p; + } VLA_PTR_ADD (decls, decl); num_dfa_decls++; } -/* Process an ABSENCE_SET. +/* Process a PRESENCE_SET. + + This gives information about a cpu unit reservation requirements. + We fill a struct unit_pattern_rel_decl (presence) with information + used later by `expand_automata'. */ + void +gen_presence_set (def) + rtx def; +{ + gen_presence_absence_set (def, TRUE, FALSE); +} + +/* Process a FINAL_PRESENCE_SET. + + This gives information about a cpu unit reservation requirements. + We fill a struct unit_pattern_rel_decl (presence) with information + used later by `expand_automata'. */ +void +gen_final_presence_set (def) + rtx def; +{ + gen_presence_absence_set (def, TRUE, TRUE); +} + +/* Process an ABSENCE_SET. This gives information about a cpu unit reservation requirements. - We fill a struct unit_rel_decl (absence) with information used - later by `expand_automata'. */ + We fill a struct unit_pattern_rel_decl (absence) with information + used later by `expand_automata'. */ void gen_absence_set (def) rtx def; { - decl_t decl; - char **first_str_cpu_units; - char **second_str_cpu_units; - int first_vect_length; - int length; - int i; - - first_str_cpu_units - = get_str_vect ((char *) XSTR (def, 0), &first_vect_length, ',', 0); - if (first_str_cpu_units == NULL) - fatal ("invalid first string `%s' in absence_set", XSTR (def, 0)); - second_str_cpu_units = get_str_vect ((char *) XSTR (def, 1), &length, ',', - 0); - if (second_str_cpu_units == NULL) - fatal ("invalid second string `%s' in absence_set", XSTR (def, 1)); - length += first_vect_length; - decl = create_node (sizeof (struct decl) + (length - 1) * sizeof (char *)); - decl->mode = dm_absence; - decl->pos = 0; - DECL_ABSENCE (decl)->names_num = length; - DECL_ABSENCE (decl)->first_list_length = first_vect_length; - for (i = 0; i < length; i++) - if (i < first_vect_length) - DECL_ABSENCE (decl)->names [i] = first_str_cpu_units [i]; - else - DECL_ABSENCE (decl)->names [i] - = second_str_cpu_units [i - first_vect_length]; - VLA_PTR_ADD (decls, decl); - num_dfa_decls++; + gen_presence_absence_set (def, FALSE, FALSE); } + +/* Process a FINAL_ABSENCE_SET. + This gives information about a cpu unit reservation requirements. + We fill a struct unit_pattern_rel_decl (absence) with information + used later by `expand_automata'. */ +void +gen_final_absence_set (def) + rtx def; +{ + gen_presence_absence_set (def, FALSE, TRUE); +} + /* Process a DEFINE_AUTOMATON. This gives information about a finite state automaton used for @@ -1834,7 +1934,8 @@ gen_automaton (def) int vect_length; int i; - str_automata = get_str_vect ((char *) XSTR (def, 0), &vect_length, ',', 0); + str_automata = get_str_vect ((char *) XSTR (def, 0), &vect_length, ',', + FALSE); if (str_automata == NULL) fatal ("invalid string `%s' in define_automaton", XSTR (def, 0)); for (i = 0; i < vect_length; i++) @@ -1918,7 +2019,7 @@ gen_regexp_repeat (str) int els_num; int i; - repeat_vect = get_str_vect (str, &els_num, '*', 1); + repeat_vect = get_str_vect (str, &els_num, '*', TRUE); if (repeat_vect == NULL) fatal ("invalid `%s' in reservation `%s'", str, reserv_str); if (els_num > 1) @@ -1951,7 +2052,7 @@ gen_regexp_allof (str) int els_num; int i; - allof_vect = get_str_vect (str, &els_num, '+', 1); + allof_vect = get_str_vect (str, &els_num, '+', TRUE); if (allof_vect == NULL) fatal ("invalid `%s' in reservation `%s'", str, reserv_str); if (els_num > 1) @@ -1978,7 +2079,7 @@ gen_regexp_oneof (str) int els_num; int i; - oneof_vect = get_str_vect (str, &els_num, '|', 1); + oneof_vect = get_str_vect (str, &els_num, '|', TRUE); if (oneof_vect == NULL) fatal ("invalid `%s' in reservation `%s'", str, reserv_str); if (els_num > 1) @@ -2005,7 +2106,7 @@ gen_regexp_sequence (str) int els_num; int i; - sequence_vect = get_str_vect (str, &els_num, ',', 1); + sequence_vect = get_str_vect (str, &els_num, ',', TRUE); if (els_num > 1) { sequence = create_node (sizeof (struct regexp) @@ -2494,15 +2595,16 @@ add_excls (dest_list, source_list, excl_pos) } } -/* Checking NAMES in a presence clause vector and returning formed - unit_set_el_list. The function is called only after processing all - exclusion sets. */ +/* Checking NAMES in presence/absence clause and returning the + formed unit_set_el_list. The function is called only after + processing all exclusion sets. */ static unit_set_el_t -process_presence_absence (names, num, req_pos, presence_p) +process_presence_absence_names (names, num, req_pos, presence_p, final_p) char **names; int num; pos_t req_pos ATTRIBUTE_UNUSED; int presence_p; + int final_p; { unit_set_el_t el_list; unit_set_el_t last_el; @@ -2517,12 +2619,20 @@ process_presence_absence (names, num, req_pos, presence_p) decl_in_table = find_decl (names [i]); if (decl_in_table == NULL) error ((presence_p - ? "unit `%s' in presence set is not declared" - : "unit `%s' in absence set is not declared"), names [i]); + ? (final_p + ? "unit `%s' in final presence set is not declared" + : "unit `%s' in presence set is not declared") + : (final_p + ? "unit `%s' in final absence set is not declared" + : "unit `%s' in absence set is not declared")), names [i]); else if (decl_in_table->mode != dm_unit) error ((presence_p - ? "`%s' in presence set is not unit" - : "`%s' in absence set is not unit"), names [i]); + ? (final_p + ? "`%s' in final presence set is not unit" + : "`%s' in presence set is not unit") + : (final_p + ? "`%s' in final absence set is not unit" + : "`%s' in absence set is not unit")), names [i]); else { new_el = create_node (sizeof (struct unit_set_el)); @@ -2540,116 +2650,202 @@ process_presence_absence (names, num, req_pos, presence_p) return el_list; } -/* The function adds each element from SOURCE_LIST to presence (if +/* Checking NAMES in patterns of a presence/absence clause and + returning the formed pattern_set_el_list. The function is called + only after processing all exclusion sets. */ +static pattern_set_el_t +process_presence_absence_patterns (patterns, num, req_pos, presence_p, final_p) + char ***patterns; + int num; + pos_t req_pos ATTRIBUTE_UNUSED; + int presence_p; + int final_p; +{ + pattern_set_el_t el_list; + pattern_set_el_t last_el; + pattern_set_el_t new_el; + decl_t decl_in_table; + int i, j; + + el_list = NULL; + last_el = NULL; + for (i = 0; i < num; i++) + { + for (j = 0; patterns [i] [j] != NULL; j++) + ; + new_el = create_node (sizeof (struct pattern_set_el) + + sizeof (struct unit_decl *) * j); + new_el->unit_decls + = (struct unit_decl **) ((char *) new_el + + sizeof (struct pattern_set_el)); + new_el->next_pattern_set_el = NULL; + if (last_el == NULL) + el_list = last_el = new_el; + else + { + last_el->next_pattern_set_el = new_el; + last_el = last_el->next_pattern_set_el; + } + new_el->units_num = 0; + for (j = 0; patterns [i] [j] != NULL; j++) + { + decl_in_table = find_decl (patterns [i] [j]); + if (decl_in_table == NULL) + error ((presence_p + ? (final_p + ? "unit `%s' in final presence set is not declared" + : "unit `%s' in presence set is not declared") + : (final_p + ? "unit `%s' in final absence set is not declared" + : "unit `%s' in absence set is not declared")), + patterns [i] [j]); + else if (decl_in_table->mode != dm_unit) + error ((presence_p + ? (final_p + ? "`%s' in final presence set is not unit" + : "`%s' in presence set is not unit") + : (final_p + ? "`%s' in final absence set is not unit" + : "`%s' in absence set is not unit")), + patterns [i] [j]); + else + { + new_el->unit_decls [new_el->units_num] + = DECL_UNIT (decl_in_table); + new_el->units_num++; + } + } + } + return el_list; +} + +/* The function adds each element from PATTERN_LIST to presence (if PRESENCE_P) or absence list of the each element from DEST_LIST. - Checking situations "unit requires own presence", "unit requires - own absence", and "unit excludes and requires presence of ...". - Remember that we process absence sets only after all presence - sets. */ -static void -add_presence_absence (dest_list, source_list, req_pos, presence_p) + Checking situations "unit requires own absence", and "unit excludes + and requires presence of ...", "unit requires absence and presence + of ...", "units in (final) presence set belong to different + automata", and "units in (final) absence set belong to different + automata". Remember that we process absence sets only after all + presence sets. */ +static void +add_presence_absence (dest_list, pattern_list, req_pos, presence_p, final_p) unit_set_el_t dest_list; - unit_set_el_t source_list; + pattern_set_el_t pattern_list; pos_t req_pos ATTRIBUTE_UNUSED; int presence_p; + int final_p; { unit_set_el_t dst; - unit_set_el_t src; - unit_set_el_t curr_el; - unit_set_el_t prev_el; - unit_set_el_t copy; + pattern_set_el_t pat; + struct unit_decl *unit; + unit_set_el_t curr_excl_el; + pattern_set_el_t curr_pat_el; + pattern_set_el_t prev_el; + pattern_set_el_t copy; + int i; + int no_error_flag; for (dst = dest_list; dst != NULL; dst = dst->next_unit_set_el) - for (src = source_list; src != NULL; src = src->next_unit_set_el) + for (pat = pattern_list; pat != NULL; pat = pat->next_pattern_set_el) { - if (dst->unit_decl == src->unit_decl) - { - error ((presence_p - ? "unit `%s' requires own presence" - : "unit `%s' requires own absence"), src->unit_decl->name); - continue; - } - if (dst->unit_decl->automaton_name != NULL - && src->unit_decl->automaton_name != NULL - && strcmp (dst->unit_decl->automaton_name, - src->unit_decl->automaton_name) != 0) - { - error ((presence_p - ? "units `%s' and `%s' in presence set belong to different automata" - : "units `%s' and `%s' in absence set belong to different automata"), - src->unit_decl->name, dst->unit_decl->name); - continue; - } - for (curr_el = (presence_p - ? dst->unit_decl->presence_list - : dst->unit_decl->absence_list), prev_el = NULL; - curr_el != NULL; - prev_el = curr_el, curr_el = curr_el->next_unit_set_el) - if (curr_el->unit_decl == src->unit_decl) - break; - if (curr_el == NULL) + for (i = 0; i < pat->units_num; i++) { - /* Element not found - insert if there is no error. */ - int no_error_flag = 1; - + unit = pat->unit_decls [i]; + if (dst->unit_decl == unit && pat->units_num == 1 && !presence_p) + { + error ("unit `%s' requires own absence", unit->name); + continue; + } + if (dst->unit_decl->automaton_name != NULL + && unit->automaton_name != NULL + && strcmp (dst->unit_decl->automaton_name, + unit->automaton_name) != 0) + { + error ((presence_p + ? (final_p + ? "units `%s' and `%s' in final presence set belong to different automata" + : "units `%s' and `%s' in presence set belong to different automata") + : (final_p + ? "units `%s' and `%s' in final absence set belong to different automata" + : "units `%s' and `%s' in absence set belong to different automata")), + unit->name, dst->unit_decl->name); + continue; + } + no_error_flag = 1; if (presence_p) - for (curr_el = dst->unit_decl->excl_list; - curr_el != NULL; - curr_el = curr_el->next_unit_set_el) + for (curr_excl_el = dst->unit_decl->excl_list; + curr_excl_el != NULL; + curr_excl_el = curr_excl_el->next_unit_set_el) { - if (src->unit_decl == curr_el->unit_decl) + if (unit == curr_excl_el->unit_decl && pat->units_num == 1) { if (!w_flag) { - error - ("unit `%s' excludes and requires presence of `%s'", - dst->unit_decl->name, src->unit_decl->name); + error ("unit `%s' excludes and requires presence of `%s'", + dst->unit_decl->name, unit->name); no_error_flag = 0; } else warning ("unit `%s' excludes and requires presence of `%s'", - dst->unit_decl->name, src->unit_decl->name); + dst->unit_decl->name, unit->name); } } - else - for (curr_el = dst->unit_decl->presence_list; - curr_el != NULL; - curr_el = curr_el->next_unit_set_el) - { - if (src->unit_decl == curr_el->unit_decl) - { - if (!w_flag) - { - error - ("unit `%s' requires absence and presence of `%s'", - dst->unit_decl->name, src->unit_decl->name); - no_error_flag = 0; - } - else - warning + else if (pat->units_num == 1) + for (curr_pat_el = dst->unit_decl->presence_list; + curr_pat_el != NULL; + curr_pat_el = curr_pat_el->next_pattern_set_el) + if (curr_pat_el->units_num == 1 + && unit == curr_pat_el->unit_decls [0]) + { + if (!w_flag) + { + error ("unit `%s' requires absence and presence of `%s'", - dst->unit_decl->name, src->unit_decl->name); - } - } + dst->unit_decl->name, unit->name); + no_error_flag = 0; + } + else + warning + ("unit `%s' requires absence and presence of `%s'", + dst->unit_decl->name, unit->name); + } if (no_error_flag) { - copy = copy_node (src, sizeof (*src)); - copy->next_unit_set_el = NULL; + for (prev_el = (presence_p + ? (final_p + ? dst->unit_decl->final_presence_list + : dst->unit_decl->final_presence_list) + : (final_p + ? dst->unit_decl->final_absence_list + : dst->unit_decl->absence_list)); + prev_el != NULL && prev_el->next_pattern_set_el != NULL; + prev_el = prev_el->next_pattern_set_el) + ; + copy = copy_node (pat, sizeof (*pat)); + copy->next_pattern_set_el = NULL; if (prev_el == NULL) { if (presence_p) - dst->unit_decl->presence_list = copy; + { + if (final_p) + dst->unit_decl->final_presence_list = copy; + else + dst->unit_decl->presence_list = copy; + } + else if (final_p) + dst->unit_decl->final_absence_list = copy; else dst->unit_decl->absence_list = copy; } else - prev_el->next_unit_set_el = copy; + prev_el->next_pattern_set_el = copy; } - } - } + } + } } + /* The function searches for bypass with given IN_INSN_RESERV in given BYPASS_LIST. */ static struct bypass_decl * @@ -2849,7 +3045,7 @@ process_decls () unit_set_el_list_2 = process_excls (&DECL_EXCL (decl)->names [DECL_EXCL (decl)->first_list_length], - DECL_EXCL (decl)->names_num + DECL_EXCL (decl)->all_names_num - DECL_EXCL (decl)->first_list_length, decl->pos); add_excls (unit_set_el_list, unit_set_el_list_2, decl->pos); @@ -2864,21 +3060,20 @@ process_decls () if (decl->mode == dm_presence) { unit_set_el_t unit_set_el_list; - unit_set_el_t unit_set_el_list_2; + pattern_set_el_t pattern_set_el_list; unit_set_el_list - = process_presence_absence - (DECL_PRESENCE (decl)->names, - DECL_PRESENCE (decl)->first_list_length, decl->pos, 1); - unit_set_el_list_2 - = process_presence_absence - (&DECL_PRESENCE (decl)->names - [DECL_PRESENCE (decl)->first_list_length], - DECL_PRESENCE (decl)->names_num - - DECL_PRESENCE (decl)->first_list_length, - decl->pos, 1); - add_presence_absence (unit_set_el_list, unit_set_el_list_2, - decl->pos, 1); + = process_presence_absence_names + (DECL_PRESENCE (decl)->names, DECL_PRESENCE (decl)->names_num, + decl->pos, TRUE, DECL_PRESENCE (decl)->final_p); + pattern_set_el_list + = process_presence_absence_patterns + (DECL_PRESENCE (decl)->patterns, + DECL_PRESENCE (decl)->patterns_num, + decl->pos, TRUE, DECL_PRESENCE (decl)->final_p); + add_presence_absence (unit_set_el_list, pattern_set_el_list, + decl->pos, TRUE, + DECL_PRESENCE (decl)->final_p); } } @@ -2889,21 +3084,20 @@ process_decls () if (decl->mode == dm_absence) { unit_set_el_t unit_set_el_list; - unit_set_el_t unit_set_el_list_2; + pattern_set_el_t pattern_set_el_list; unit_set_el_list - = process_presence_absence - (DECL_ABSENCE (decl)->names, - DECL_ABSENCE (decl)->first_list_length, decl->pos, 0); - unit_set_el_list_2 - = process_presence_absence - (&DECL_ABSENCE (decl)->names - [DECL_ABSENCE (decl)->first_list_length], - DECL_ABSENCE (decl)->names_num - - DECL_ABSENCE (decl)->first_list_length, - decl->pos, 0); - add_presence_absence (unit_set_el_list, unit_set_el_list_2, - decl->pos, 0); + = process_presence_absence_names + (DECL_ABSENCE (decl)->names, DECL_ABSENCE (decl)->names_num, + decl->pos, FALSE, DECL_ABSENCE (decl)->final_p); + pattern_set_el_list + = process_presence_absence_patterns + (DECL_ABSENCE (decl)->patterns, + DECL_ABSENCE (decl)->patterns_num, + decl->pos, FALSE, DECL_ABSENCE (decl)->final_p); + add_presence_absence (unit_set_el_list, pattern_set_el_list, + decl->pos, FALSE, + DECL_ABSENCE (decl)->final_p); } } } @@ -3141,72 +3335,94 @@ check_loops_in_regexps () } /* The function recursively processes IR of reservation and defines - max and min cycle for reservation of unit and for result in the - reservation. */ -static int -process_regexp_cycles (regexp, start_cycle) + max and min cycle for reservation of unit. */ +static void +process_regexp_cycles (regexp, max_start_cycle, min_start_cycle, + max_finish_cycle, min_finish_cycle) regexp_t regexp; - int start_cycle; + int max_start_cycle, min_start_cycle; + int *max_finish_cycle, *min_finish_cycle; { int i; if (regexp->mode == rm_unit) { - if (REGEXP_UNIT (regexp)->unit_decl->max_occ_cycle_num < start_cycle) - REGEXP_UNIT (regexp)->unit_decl->max_occ_cycle_num = start_cycle; - return start_cycle; + if (REGEXP_UNIT (regexp)->unit_decl->max_occ_cycle_num < max_start_cycle) + REGEXP_UNIT (regexp)->unit_decl->max_occ_cycle_num = max_start_cycle; + if (REGEXP_UNIT (regexp)->unit_decl->min_occ_cycle_num > min_start_cycle + || REGEXP_UNIT (regexp)->unit_decl->min_occ_cycle_num == -1) + REGEXP_UNIT (regexp)->unit_decl->min_occ_cycle_num = min_start_cycle; + *max_finish_cycle = max_start_cycle; + *min_finish_cycle = min_start_cycle; } else if (regexp->mode == rm_reserv) - return process_regexp_cycles (REGEXP_RESERV (regexp)->reserv_decl->regexp, - start_cycle); + process_regexp_cycles (REGEXP_RESERV (regexp)->reserv_decl->regexp, + max_start_cycle, min_start_cycle, + max_finish_cycle, min_finish_cycle); else if (regexp->mode == rm_repeat) { for (i = 0; i < REGEXP_REPEAT (regexp)->repeat_num; i++) - start_cycle = process_regexp_cycles (REGEXP_REPEAT (regexp)->regexp, - start_cycle) + 1; - return start_cycle; + { + process_regexp_cycles (REGEXP_REPEAT (regexp)->regexp, + max_start_cycle, min_start_cycle, + max_finish_cycle, min_finish_cycle); + max_start_cycle = *max_finish_cycle + 1; + min_start_cycle = *min_finish_cycle + 1; + } } else if (regexp->mode == rm_sequence) { for (i = 0; i <REGEXP_SEQUENCE (regexp)->regexps_num; i++) - start_cycle - = process_regexp_cycles (REGEXP_SEQUENCE (regexp)->regexps [i], - start_cycle) + 1; - return start_cycle; + { + process_regexp_cycles (REGEXP_SEQUENCE (regexp)->regexps [i], + max_start_cycle, min_start_cycle, + max_finish_cycle, min_finish_cycle); + max_start_cycle = *max_finish_cycle + 1; + min_start_cycle = *min_finish_cycle + 1; + } } else if (regexp->mode == rm_allof) { - int finish_cycle = 0; - int cycle; + int max_cycle = 0; + int min_cycle = 0; for (i = 0; i < REGEXP_ALLOF (regexp)->regexps_num; i++) { - cycle = process_regexp_cycles (REGEXP_ALLOF (regexp)->regexps [i], - start_cycle); - if (finish_cycle < cycle) - finish_cycle = cycle; + process_regexp_cycles (REGEXP_ALLOF (regexp)->regexps [i], + max_start_cycle, min_start_cycle, + max_finish_cycle, min_finish_cycle); + if (max_cycle < *max_finish_cycle) + max_cycle = *max_finish_cycle; + if (i == 0 || min_cycle > *min_finish_cycle) + min_cycle = *min_finish_cycle; } - return finish_cycle; + *max_finish_cycle = max_cycle; + *min_finish_cycle = min_cycle; } else if (regexp->mode == rm_oneof) { - int finish_cycle = 0; - int cycle; + int max_cycle = 0; + int min_cycle = 0; for (i = 0; i < REGEXP_ONEOF (regexp)->regexps_num; i++) { - cycle = process_regexp_cycles (REGEXP_ONEOF (regexp)->regexps [i], - start_cycle); - if (finish_cycle < cycle) - finish_cycle = cycle; + process_regexp_cycles (REGEXP_ONEOF (regexp)->regexps [i], + max_start_cycle, min_start_cycle, + max_finish_cycle, min_finish_cycle); + if (max_cycle < *max_finish_cycle) + max_cycle = *max_finish_cycle; + if (i == 0 || min_cycle > *min_finish_cycle) + min_cycle = *min_finish_cycle; } - return finish_cycle; + *max_finish_cycle = max_cycle; + *min_finish_cycle = min_cycle; } else { if (regexp->mode != rm_nothing) abort (); - return start_cycle; + *max_finish_cycle = max_start_cycle; + *min_finish_cycle = min_start_cycle; } } @@ -3216,6 +3432,7 @@ static void evaluate_max_reserv_cycles () { int max_insn_cycles_num; + int min_insn_cycles_num; decl_t decl; int i; @@ -3225,8 +3442,8 @@ evaluate_max_reserv_cycles () decl = description->decls [i]; if (decl->mode == dm_insn_reserv) { - max_insn_cycles_num - = process_regexp_cycles (DECL_INSN_RESERV (decl)->regexp, 0); + process_regexp_cycles (DECL_INSN_RESERV (decl)->regexp, 0, 0, + &max_insn_cycles_num, &min_insn_cycles_num); if (description->max_insn_reserv_cycles < max_insn_cycles_num) description->max_insn_reserv_cycles = max_insn_cycles_num; } @@ -3553,6 +3770,9 @@ finish_alt_states () #define SET_BIT(bitstring, bitno) \ (((char *) (bitstring)) [(bitno) / CHAR_BIT] |= 1 << (bitno) % CHAR_BIT) +#define CLEAR_BIT(bitstring, bitno) \ + (((char *) (bitstring)) [(bitno) / CHAR_BIT] &= ~(1 << (bitno) % CHAR_BIT)) + /* Test if bit number bitno in the bitstring is set. The macro is not side effect proof. */ #define TEST_BIT(bitstring, bitno) \ @@ -3583,6 +3803,9 @@ static vla_ptr_t units_container; /* The start address of the array. */ static unit_decl_t *units_array; +/* Temporary reservation of maximal length. */ +static reserv_sets_t temp_reserv; + /* The state table itself is represented by the following variable. */ static htab_t state_table; @@ -3743,7 +3966,6 @@ reserv_sets_are_intersected (operand_1, operand_2) set_el_t *el_ptr_2; set_el_t *cycle_ptr_1; set_el_t *cycle_ptr_2; - int nonzero_p; if (operand_1 == NULL || operand_2 == NULL) abort (); @@ -3752,6 +3974,7 @@ reserv_sets_are_intersected (operand_1, operand_2) el_ptr_1++, el_ptr_2++) if (*el_ptr_1 & *el_ptr_2) return 1; + reserv_sets_or (temp_reserv, operand_1, operand_2); for (cycle_ptr_1 = operand_1, cycle_ptr_2 = operand_2; cycle_ptr_1 < operand_1 + els_in_reservs; cycle_ptr_1 += els_in_cycle_reserv, cycle_ptr_2 += els_in_cycle_reserv) @@ -3761,25 +3984,17 @@ reserv_sets_are_intersected (operand_1, operand_2) el_ptr_1++, el_ptr_2++) if (*el_ptr_1 & *el_ptr_2) return 1; - nonzero_p = 0; - for (el_ptr_1 = cycle_ptr_1, - el_ptr_2 = get_presence_absence_set (cycle_ptr_2, 1); - el_ptr_1 < cycle_ptr_1 + els_in_cycle_reserv; - el_ptr_1++, el_ptr_2++) - if (*el_ptr_1 & *el_ptr_2) - break; - else if (*el_ptr_2 != 0) - nonzero_p = 1; - if (nonzero_p && el_ptr_1 >= cycle_ptr_1 + els_in_cycle_reserv) + if (!check_presence_pattern_sets (cycle_ptr_1, cycle_ptr_2, FALSE)) + return 1; + if (!check_presence_pattern_sets (temp_reserv + (cycle_ptr_2 + - operand_2), + cycle_ptr_2, TRUE)) + return 1; + if (!check_absence_pattern_sets (cycle_ptr_1, cycle_ptr_2, FALSE)) + return 1; + if (!check_absence_pattern_sets (temp_reserv + (cycle_ptr_2 - operand_2), + cycle_ptr_2, TRUE)) return 1; - for (el_ptr_1 = cycle_ptr_1, - el_ptr_2 = get_presence_absence_set (cycle_ptr_2, 0); - el_ptr_1 < cycle_ptr_1 + els_in_cycle_reserv; - el_ptr_1++, el_ptr_2++) - /* It looks like code for exclusion but exclusion set is - made as symmetric relation preliminary. */ - if (*el_ptr_1 & *el_ptr_2) - return 1; } return 0; } @@ -3876,7 +4091,7 @@ output_cycle_reservs (f, reservs, start_cycle, repetition_num) fprintf (f, NOTHING_NAME); if (repetition_num <= 0) abort (); - if (reserved_units_num > 1) + if (repetition_num != 1 && reserved_units_num > 1) fprintf (f, ")"); if (repetition_num != 1) fprintf (f, "*%d", repetition_num); @@ -4076,12 +4291,13 @@ intersected_state_reservs_p (state1, state2) } /* Return deterministic state (inserted into the table) which - representing the automaton state whic is union of reservations of - deterministic states. */ + representing the automaton state which is union of reservations of + the deterministic states masked by RESERVS. */ static state_t -states_union (state1, state2) +states_union (state1, state2, reservs) state_t state1; state_t state2; + reserv_sets_t reservs; { state_t result; state_t state_in_table; @@ -4090,6 +4306,7 @@ states_union (state1, state2) abort (); result = get_free_state (1, state1->automaton); reserv_sets_or (result->reservs, state1->reservs, state2->reservs); + reserv_sets_and (result->reservs, result->reservs, reservs); state_in_table = insert_state (result); if (result != state_in_table) { @@ -4101,16 +4318,18 @@ states_union (state1, state2) /* Return deterministic state (inserted into the table) which represent the automaton state is obtained from deterministic STATE - by advancing cpu cycle. */ + by advancing cpu cycle and masking by RESERVS. */ static state_t -state_shift (state) +state_shift (state, reservs) state_t state; + reserv_sets_t reservs; { state_t result; state_t state_in_table; result = get_free_state (1, state->automaton); reserv_sets_shift (result->reservs, state->reservs); + reserv_sets_and (result->reservs, result->reservs, reservs); state_in_table = insert_state (result); if (result != state_in_table) { @@ -4146,7 +4365,7 @@ initiate_states () initiate_alt_states (); VLA_PTR_CREATE (free_states, 1500, "free states"); state_table = htab_create (1500, state_hash, state_eq_p, (htab_del) 0); - alloc_empty_reserv_sets (); + temp_reserv = alloc_empty_reserv_sets (); } /* Finishing work with the abstract data. */ @@ -4488,7 +4707,10 @@ initiate_excl_sets () for (el = DECL_UNIT (decl)->excl_list; el != NULL; el = el->next_unit_set_el) - SET_BIT (unit_excl_set, el->unit_decl->unit_num); + { + SET_BIT (unit_excl_set, el->unit_decl->unit_num); + el->unit_decl->in_set_p = TRUE; + } unit_excl_set_table [DECL_UNIT (decl)->unit_num] = unit_excl_set; } } @@ -4527,39 +4749,65 @@ get_excl_set (in_set) -/* The page contains abstract data for work with presence/absence sets - (see presence_set/absence_set in file rtl.def). */ +/* The page contains abstract data for work with presence/absence + pattern sets (see presence_set/absence_set in file rtl.def). */ -/* The following variables refer to correspondingly a presence and an - absence set returned by get_presence_absence_set. This is bit - string of length equal to cpu units number. */ -static reserv_sets_t presence_set, absence_set; +/* The following arrays contain correspondingly presence, final + presence, absence, and final absence patterns for each unit. */ +static pattern_reserv_t *unit_presence_set_table; +static pattern_reserv_t *unit_final_presence_set_table; +static pattern_reserv_t *unit_absence_set_table; +static pattern_reserv_t *unit_final_absence_set_table; + +/* The following function forms list of reservation sets for given + PATTERN_LIST. */ +static pattern_reserv_t +form_reserv_sets_list (pattern_list) + pattern_set_el_t pattern_list; +{ + pattern_set_el_t el; + pattern_reserv_t first, curr, prev; + int i; -/* The following arrays contain correspondingly presence and absence - sets for each unit. */ -static reserv_sets_t *unit_presence_set_table, *unit_absence_set_table; + prev = first = NULL; + for (el = pattern_list; el != NULL; el = el->next_pattern_set_el) + { + curr = create_node (sizeof (struct pattern_reserv)); + curr->reserv = alloc_empty_reserv_sets (); + curr->next_pattern_reserv = NULL; + for (i = 0; i < el->units_num; i++) + { + SET_BIT (curr->reserv, el->unit_decls [i]->unit_num); + el->unit_decls [i]->in_set_p = TRUE; + } + if (prev != NULL) + prev->next_pattern_reserv = curr; + else + first = curr; + prev = curr; + } + return first; +} -/* The following function forms the array containing presence and - absence sets for each unit */ + /* The following function forms the array containing presence and + absence pattern sets for each unit. */ static void -initiate_presence_absence_sets () +initiate_presence_absence_pattern_sets () { decl_t decl; - reserv_sets_t unit_set; - unit_set_el_t el; int i; - obstack_blank (&irp, els_in_cycle_reserv * sizeof (set_el_t)); - presence_set = (reserv_sets_t) obstack_base (&irp); + obstack_blank (&irp, description->units_num * sizeof (pattern_reserv_t)); + unit_presence_set_table = (pattern_reserv_t *) obstack_base (&irp); obstack_finish (&irp); - obstack_blank (&irp, description->units_num * sizeof (reserv_sets_t)); - unit_presence_set_table = (reserv_sets_t *) obstack_base (&irp); + obstack_blank (&irp, description->units_num * sizeof (pattern_reserv_t)); + unit_final_presence_set_table = (pattern_reserv_t *) obstack_base (&irp); obstack_finish (&irp); - obstack_blank (&irp, els_in_cycle_reserv * sizeof (set_el_t)); - absence_set = (reserv_sets_t) obstack_base (&irp); + obstack_blank (&irp, description->units_num * sizeof (pattern_reserv_t)); + unit_absence_set_table = (pattern_reserv_t *) obstack_base (&irp); obstack_finish (&irp); - obstack_blank (&irp, description->units_num * sizeof (reserv_sets_t)); - unit_absence_set_table = (reserv_sets_t *) obstack_base (&irp); + obstack_blank (&irp, description->units_num * sizeof (pattern_reserv_t)); + unit_final_absence_set_table = (pattern_reserv_t *) obstack_base (&irp); obstack_finish (&irp); /* Evaluate unit presence/absence sets. */ for (i = 0; i < description->decls_num; i++) @@ -4567,65 +4815,107 @@ initiate_presence_absence_sets () decl = description->decls [i]; if (decl->mode == dm_unit) { - obstack_blank (&irp, els_in_cycle_reserv * sizeof (set_el_t)); - unit_set = (reserv_sets_t) obstack_base (&irp); - obstack_finish (&irp); - memset (unit_set, 0, els_in_cycle_reserv * sizeof (set_el_t)); - for (el = DECL_UNIT (decl)->presence_list; - el != NULL; - el = el->next_unit_set_el) - SET_BIT (unit_set, el->unit_decl->unit_num); - unit_presence_set_table [DECL_UNIT (decl)->unit_num] = unit_set; - - obstack_blank (&irp, els_in_cycle_reserv * sizeof (set_el_t)); - unit_set = (reserv_sets_t) obstack_base (&irp); - obstack_finish (&irp); - memset (unit_set, 0, els_in_cycle_reserv * sizeof (set_el_t)); - for (el = DECL_UNIT (decl)->absence_list; - el != NULL; - el = el->next_unit_set_el) - SET_BIT (unit_set, el->unit_decl->unit_num); - unit_absence_set_table [DECL_UNIT (decl)->unit_num] = unit_set; + unit_presence_set_table [DECL_UNIT (decl)->unit_num] + = form_reserv_sets_list (DECL_UNIT (decl)->presence_list); + unit_final_presence_set_table [DECL_UNIT (decl)->unit_num] + = form_reserv_sets_list (DECL_UNIT (decl)->final_presence_list); + unit_absence_set_table [DECL_UNIT (decl)->unit_num] + = form_reserv_sets_list (DECL_UNIT (decl)->absence_list); + unit_final_absence_set_table [DECL_UNIT (decl)->unit_num] + = form_reserv_sets_list (DECL_UNIT (decl)->final_absence_list); } } } -/* The function sets up and return PRESENCE_SET (if PRESENCE_P) or - ABSENCE_SET which is union of corresponding sets for each unit in - IN_SET. */ -static reserv_sets_t -get_presence_absence_set (in_set, presence_p) - reserv_sets_t in_set; - int presence_p; +/* The function checks that CHECKED_SET satisfies all presence pattern + sets for units in ORIGIONAL_SET. The function returns TRUE if it + is ok. */ +static int +check_presence_pattern_sets (checked_set, origional_set, final_p) + reserv_sets_t checked_set, origional_set; + int final_p; { int char_num; int chars_num; int i; int start_unit_num; int unit_num; + int presence_p; + pattern_reserv_t pat_reserv; + + chars_num = els_in_cycle_reserv * sizeof (set_el_t); + for (char_num = 0; char_num < chars_num; char_num++) + if (((unsigned char *) origional_set) [char_num]) + for (i = CHAR_BIT - 1; i >= 0; i--) + if ((((unsigned char *) origional_set) [char_num] >> i) & 1) + { + start_unit_num = char_num * CHAR_BIT + i; + if (start_unit_num >= description->units_num) + break; + if ((final_p + && unit_final_presence_set_table [start_unit_num] == NULL) + || (!final_p + && unit_presence_set_table [start_unit_num] == NULL)) + continue; + presence_p = FALSE; + for (pat_reserv = (final_p + ? unit_final_presence_set_table [start_unit_num] + : unit_presence_set_table [start_unit_num]); + pat_reserv != NULL; + pat_reserv = pat_reserv->next_pattern_reserv) + { + for (unit_num = 0; unit_num < els_in_cycle_reserv; unit_num++) + if ((checked_set [unit_num] & pat_reserv->reserv [unit_num]) + != pat_reserv->reserv [unit_num]) + break; + presence_p = presence_p || unit_num >= els_in_cycle_reserv; + } + if (!presence_p) + return FALSE; + } + return TRUE; +} +/* The function checks that CHECKED_SET satisfies all absence pattern + sets for units in ORIGIONAL_SET. The function returns TRUE if it + is ok. */ +static int +check_absence_pattern_sets (checked_set, origional_set, final_p) + reserv_sets_t checked_set, origional_set; + int final_p; +{ + int char_num; + int chars_num; + int i; + int start_unit_num; + int unit_num; + pattern_reserv_t pat_reserv; + chars_num = els_in_cycle_reserv * sizeof (set_el_t); - if (presence_p) - memset (presence_set, 0, chars_num); - else - memset (absence_set, 0, chars_num); for (char_num = 0; char_num < chars_num; char_num++) - if (((unsigned char *) in_set) [char_num]) + if (((unsigned char *) origional_set) [char_num]) for (i = CHAR_BIT - 1; i >= 0; i--) - if ((((unsigned char *) in_set) [char_num] >> i) & 1) + if ((((unsigned char *) origional_set) [char_num] >> i) & 1) { start_unit_num = char_num * CHAR_BIT + i; if (start_unit_num >= description->units_num) - return (presence_p ? presence_set : absence_set); - for (unit_num = 0; unit_num < els_in_cycle_reserv; unit_num++) - if (presence_p) - presence_set [unit_num] - |= unit_presence_set_table [start_unit_num] [unit_num]; - else - absence_set [unit_num] - |= unit_absence_set_table [start_unit_num] [unit_num]; + break; + for (pat_reserv = (final_p + ? unit_final_absence_set_table [start_unit_num] + : unit_absence_set_table [start_unit_num]); + pat_reserv != NULL; + pat_reserv = pat_reserv->next_pattern_reserv) + { + for (unit_num = 0; unit_num < els_in_cycle_reserv; unit_num++) + if ((checked_set [unit_num] & pat_reserv->reserv [unit_num]) + != pat_reserv->reserv [unit_num] + && pat_reserv->reserv [unit_num]) + break; + if (unit_num >= els_in_cycle_reserv) + return FALSE; + } } - return (presence_p ? presence_set : absence_set); + return TRUE; } @@ -4932,10 +5222,13 @@ transform_3 (regexp) } else if (regexp->mode == rm_allof) { - regexp_t oneof = NULL, seq; - int oneof_index = 0, max_seq_length, allof_length; + regexp_t oneof = NULL; + regexp_t seq; + int oneof_index = 0; + int max_seq_length, allof_length; regexp_t result; - regexp_t allof = NULL, allof_op = NULL; + regexp_t allof = NULL; + regexp_t allof_op = NULL; int i, j; for (i = 0; i < REGEXP_ALLOF (regexp)->regexps_num; i++) @@ -4982,19 +5275,18 @@ transform_3 (regexp) max_seq_length = 0; if (regexp->mode == rm_allof) for (i = 0; i < REGEXP_ALLOF (regexp)->regexps_num; i++) - { - if (REGEXP_ALLOF (regexp)->regexps [i]->mode == rm_sequence) - { - seq = REGEXP_ALLOF (regexp)->regexps [i]; - if (max_seq_length < REGEXP_SEQUENCE (seq)->regexps_num) - max_seq_length = REGEXP_SEQUENCE (seq)->regexps_num; - } - else if (REGEXP_ALLOF (regexp)->regexps [i]->mode != rm_unit) - { - max_seq_length = 0; - break; - } - } + if (REGEXP_ALLOF (regexp)->regexps [i]->mode == rm_sequence) + { + seq = REGEXP_ALLOF (regexp)->regexps [i]; + if (max_seq_length < REGEXP_SEQUENCE (seq)->regexps_num) + max_seq_length = REGEXP_SEQUENCE (seq)->regexps_num; + } + else if (REGEXP_ALLOF (regexp)->regexps [i]->mode != rm_unit + && REGEXP_ALLOF (regexp)->regexps [i]->mode != rm_nothing) + { + max_seq_length = 0; + break; + } if (max_seq_length != 0) { if (max_seq_length == 1 || REGEXP_ALLOF (regexp)->regexps_num <= 1) @@ -5019,7 +5311,9 @@ transform_3 (regexp) } else if (i == 0 && (REGEXP_ALLOF (regexp)->regexps [j]->mode - == rm_unit)) + == rm_unit + || (REGEXP_ALLOF (regexp)->regexps [j]->mode + == rm_nothing))) { allof_op = REGEXP_ALLOF (regexp)->regexps [j]; allof_length++; @@ -5051,7 +5345,9 @@ transform_3 (regexp) } else if (i == 0 && (REGEXP_ALLOF (regexp)->regexps [j]->mode - == rm_unit)) + == rm_unit + || (REGEXP_ALLOF (regexp)->regexps [j]->mode + == rm_nothing))) { allof_op = REGEXP_ALLOF (regexp)->regexps [j]; REGEXP_ALLOF (allof)->regexps [allof_length] @@ -5140,24 +5436,25 @@ transform_insn_regexps () -/* The following variable is an array indexed by cycle. Each element - contains cyclic list of units which should be in the same cycle. */ -static unit_decl_t *the_same_automaton_lists; +/* The following variable value is TRUE if the first annotated message + about units to automata distribution has been output. */ +static int annotation_message_reported_p; /* The function processes all alternative reservations on CYCLE in - given REGEXP to check the UNIT is not reserved on the all - alternatives. If it is true, the unit should be in the same - automaton with other analogous units reserved on CYCLE in given - REGEXP. */ + given REGEXP of insn reservation with INSN_RESERV_NAME to check the + UNIT (or another unit from the same automaton) is not reserved on + the all alternatives. If it is true, the function outputs message + about the rule violation. */ static void -process_unit_to_form_the_same_automaton_unit_lists (unit, regexp, cycle) +check_unit_distribution_in_reserv (insn_reserv_name, unit, regexp, cycle) + const char *insn_reserv_name; regexp_t unit; regexp_t regexp; int cycle; { int i, k; regexp_t seq, allof; - unit_decl_t unit_decl, last; + unit_decl_t unit_decl; if (regexp == NULL || regexp->mode != rm_oneof) abort (); @@ -5168,64 +5465,60 @@ process_unit_to_form_the_same_automaton_unit_lists (unit, regexp, cycle) if (seq->mode == rm_sequence) { if (cycle >= REGEXP_SEQUENCE (seq)->regexps_num) - break; + continue; allof = REGEXP_SEQUENCE (seq)->regexps [cycle]; if (allof->mode == rm_allof) { for (k = 0; k < REGEXP_ALLOF (allof)->regexps_num; k++) if (REGEXP_ALLOF (allof)->regexps [k]->mode == rm_unit && (REGEXP_UNIT (REGEXP_ALLOF (allof)->regexps [k]) - ->unit_decl == unit_decl)) + ->unit_decl->automaton_decl + == unit_decl->automaton_decl)) break; if (k >= REGEXP_ALLOF (allof)->regexps_num) break; } else if (allof->mode == rm_unit - && REGEXP_UNIT (allof)->unit_decl != unit_decl) + && (REGEXP_UNIT (allof)->unit_decl->automaton_decl + != unit_decl->automaton_decl)) break; } else if (cycle != 0) - break; + continue; else if (seq->mode == rm_allof) { for (k = 0; k < REGEXP_ALLOF (seq)->regexps_num; k++) if (REGEXP_ALLOF (seq)->regexps [k]->mode == rm_unit - && (REGEXP_UNIT (REGEXP_ALLOF (seq)->regexps [k])->unit_decl - == unit_decl)) + && (REGEXP_UNIT (REGEXP_ALLOF (seq)->regexps [k]) + ->unit_decl->automaton_decl == unit_decl->automaton_decl)) break; if (k >= REGEXP_ALLOF (seq)->regexps_num) break; } else if (seq->mode == rm_unit - && REGEXP_UNIT (seq)->unit_decl != unit_decl) + && (REGEXP_UNIT (seq)->unit_decl->automaton_decl + != unit_decl->automaton_decl)) break; } if (i >= 0) { - if (the_same_automaton_lists [cycle] == NULL) - the_same_automaton_lists [cycle] = unit_decl; - else + if (!annotation_message_reported_p) { - for (last = the_same_automaton_lists [cycle];;) - { - if (last == unit_decl) - return; - if (last->the_same_automaton_unit - == the_same_automaton_lists [cycle]) - break; - last = last->the_same_automaton_unit; - } - last->the_same_automaton_unit = unit_decl->the_same_automaton_unit; - unit_decl->the_same_automaton_unit - = the_same_automaton_lists [cycle]; + fprintf (stderr, "\n"); + error ("The following units do not satisfy units-automata distribution rule"); + error (" (A unit of given unit automaton should be on each reserv. altern.)"); + annotation_message_reported_p = TRUE; } + error ("Unit %s, reserv. %s, cycle %d", + unit_decl->name, insn_reserv_name, cycle); } } -/* The function processes given REGEXP to find units which should be - in the same automaton. */ +/* The function processes given REGEXP to find units with the wrong + distribution. */ static void -form_the_same_automaton_unit_lists_from_regexp (regexp) +check_regexp_units_distribution (insn_reserv_name, regexp) + const char *insn_reserv_name; regexp_t regexp; { int i, j, k; @@ -5233,8 +5526,6 @@ form_the_same_automaton_unit_lists_from_regexp (regexp) if (regexp == NULL || regexp->mode != rm_oneof) return; - for (i = 0; i < description->max_insn_reserv_cycles; i++) - the_same_automaton_lists [i] = NULL; for (i = REGEXP_ONEOF (regexp)->regexps_num - 1; i >= 0; i--) { seq = REGEXP_ONEOF (regexp)->regexps [i]; @@ -5247,14 +5538,14 @@ form_the_same_automaton_unit_lists_from_regexp (regexp) { unit = REGEXP_ALLOF (allof)->regexps [k]; if (unit->mode == rm_unit) - process_unit_to_form_the_same_automaton_unit_lists - (unit, regexp, j); + check_unit_distribution_in_reserv (insn_reserv_name, unit, + regexp, j); else if (unit->mode != rm_nothing) abort (); } else if (allof->mode == rm_unit) - process_unit_to_form_the_same_automaton_unit_lists - (allof, regexp, j); + check_unit_distribution_in_reserv (insn_reserv_name, allof, + regexp, j); else if (allof->mode != rm_nothing) abort (); } @@ -5263,78 +5554,37 @@ form_the_same_automaton_unit_lists_from_regexp (regexp) { unit = REGEXP_ALLOF (seq)->regexps [k]; if (unit->mode == rm_unit) - process_unit_to_form_the_same_automaton_unit_lists - (unit, regexp, 0); + check_unit_distribution_in_reserv (insn_reserv_name, unit, + regexp, 0); else if (unit->mode != rm_nothing) abort (); } else if (seq->mode == rm_unit) - process_unit_to_form_the_same_automaton_unit_lists (seq, regexp, 0); + check_unit_distribution_in_reserv (insn_reserv_name, seq, regexp, 0); else if (seq->mode != rm_nothing) abort (); } } -/* The function initializes data to search for units which should be - in the same automaton and call function - `form_the_same_automaton_unit_lists_from_regexp' for each insn - reservation regexp. */ -static void -form_the_same_automaton_unit_lists () -{ - decl_t decl; - int i; - - the_same_automaton_lists - = (unit_decl_t *) xmalloc (description->max_insn_reserv_cycles - * sizeof (unit_decl_t)); - for (i = 0; i < description->decls_num; i++) - { - decl = description->decls [i]; - if (decl->mode == dm_unit) - { - DECL_UNIT (decl)->the_same_automaton_message_reported_p = FALSE; - DECL_UNIT (decl)->the_same_automaton_unit = DECL_UNIT (decl); - } - } - for (i = 0; i < description->decls_num; i++) - { - decl = description->decls [i]; - if (decl->mode == dm_insn_reserv) - form_the_same_automaton_unit_lists_from_regexp - (DECL_INSN_RESERV (decl)->transformed_regexp); - } - free (the_same_automaton_lists); -} - -/* The function finds units which should be in the same automaton and, - if they are not, reports about it. */ +/* The function finds units which violates units to automata + distribution rule. If the units exist, report about them. */ static void check_unit_distributions_to_automata () { decl_t decl; - unit_decl_t start_unit_decl, unit_decl; int i; - form_the_same_automaton_unit_lists (); + fprintf (stderr, "Check unit distributions to automata..."); + annotation_message_reported_p = FALSE; for (i = 0; i < description->decls_num; i++) { decl = description->decls [i]; - if (decl->mode == dm_unit) - { - start_unit_decl = DECL_UNIT (decl); - if (!start_unit_decl->the_same_automaton_message_reported_p) - for (unit_decl = start_unit_decl->the_same_automaton_unit; - unit_decl != start_unit_decl; - unit_decl = unit_decl->the_same_automaton_unit) - if (start_unit_decl->automaton_decl != unit_decl->automaton_decl) - { - error ("Units `%s' and `%s' should be in the same automaton", - start_unit_decl->name, unit_decl->name); - unit_decl->the_same_automaton_message_reported_p = TRUE; - } - } + if (decl->mode == dm_insn_reserv) + check_regexp_units_distribution + (DECL_INSN_RESERV (decl)->name, + DECL_INSN_RESERV (decl)->transformed_regexp); } + fprintf (stderr, "done\n"); } @@ -5537,6 +5787,36 @@ form_ainsn_with_same_reservs (automaton) VLA_PTR_DELETE (last_insns); } +/* Forming unit reservations which can affect creating the automaton + states achieved from a given state. It permits to build smaller + automata in many cases. We would have the same automata after + the minimization without such optimization, but the automaton + right after the building could be huge. So in other words, usage + of reservs_matter means some minimization during building the + automaton. */ +static reserv_sets_t +form_reservs_matter (automaton) + automaton_t automaton; +{ + int cycle, unit; + reserv_sets_t reservs_matter = alloc_empty_reserv_sets(); + + for (cycle = 0; cycle < max_cycles_num; cycle++) + for (unit = 0; unit < description->units_num; unit++) + if (units_array [unit]->automaton_decl + == automaton->corresponding_automaton_decl + && (cycle >= units_array [unit]->min_occ_cycle_num + /* We can not remove queried unit from reservations. */ + || units_array [unit]->query_p + /* We can not remove units which are used + `exclusion_set', `presence_set', + `final_presence_set', `absence_set', and + `final_absence_set'. */ + || units_array [unit]->in_set_p)) + set_unit_reserv (reservs_matter, cycle, unit); + return reservs_matter; +} + /* The following function creates all states of nondeterministic (if NDFA_FLAG has nonzero value) or deterministic AUTOMATON. */ static void @@ -5552,6 +5832,8 @@ make_automaton (automaton) ainsn_t advance_cycle_ainsn; arc_t added_arc; vla_ptr_t state_stack; + int states_n; + reserv_sets_t reservs_matter = form_reservs_matter (automaton); VLA_PTR_CREATE (state_stack, 150, "state stack"); /* Create the start state (empty state). */ @@ -5559,6 +5841,7 @@ make_automaton (automaton) automaton->start_state = start_state; start_state->it_was_placed_in_stack_for_NDFA_forming = 1; VLA_PTR_ADD (state_stack, start_state); + states_n = 1; while (VLA_PTR_LENGTH (state_stack) != 0) { state = VLA_PTR (state_stack, VLA_PTR_LENGTH (state_stack) - 1); @@ -5582,12 +5865,15 @@ make_automaton (automaton) state2 = alt_state->state; if (!intersected_state_reservs_p (state, state2)) { - state2 = states_union (state, state2); + state2 = states_union (state, state2, reservs_matter); if (!state2->it_was_placed_in_stack_for_NDFA_forming) { state2->it_was_placed_in_stack_for_NDFA_forming = 1; VLA_PTR_ADD (state_stack, state2); + states_n++; + if (states_n % 100 == 0) + fprintf (stderr, "*"); } added_arc = add_arc (state, state2, ainsn, 1); if (!ndfa_flag) @@ -5611,11 +5897,14 @@ make_automaton (automaton) advance_cycle_ainsn = ainsn; } /* Add transition to advance cycle. */ - state2 = state_shift (state); + state2 = state_shift (state, reservs_matter); if (!state2->it_was_placed_in_stack_for_NDFA_forming) { state2->it_was_placed_in_stack_for_NDFA_forming = 1; VLA_PTR_ADD (state_stack, state2); + states_n++; + if (states_n % 100 == 0) + fprintf (stderr, "*"); } if (advance_cycle_ainsn == NULL) abort (); @@ -5652,15 +5941,15 @@ form_arcs_marked_by_insn (state) /* The function creates composed state (see comments for IR) from ORIGINAL_STATE and list of arcs ARCS_MARKED_BY_INSN marked by the same insn. If the composed state is not in STATE_STACK yet, it is - popped to STATE_STACK. */ -static void + pushed into STATE_STACK. */ +static int create_composed_state (original_state, arcs_marked_by_insn, state_stack) state_t original_state; arc_t arcs_marked_by_insn; vla_ptr_t *state_stack; { state_t state; - alt_state_t curr_alt_state; + alt_state_t alt_state, curr_alt_state; alt_state_t new_alt_state; arc_t curr_arc; arc_t next_arc; @@ -5668,9 +5957,10 @@ create_composed_state (original_state, arcs_marked_by_insn, state_stack) state_t temp_state; alt_state_t canonical_alt_states_list; int alts_number; + int new_state_p = 0; if (arcs_marked_by_insn == NULL) - return; + return new_state_p; if (arcs_marked_by_insn->next_arc_marked_by_insn == NULL) state = arcs_marked_by_insn->to_state; else @@ -5683,14 +5973,25 @@ create_composed_state (original_state, arcs_marked_by_insn, state_stack) for (curr_arc = arcs_marked_by_insn; curr_arc != NULL; curr_arc = curr_arc->next_arc_marked_by_insn) - { - new_alt_state = get_free_alt_state (); - new_alt_state->next_alt_state = curr_alt_state; - new_alt_state->state = curr_arc->to_state; - if (curr_arc->to_state->component_states != NULL) - abort (); - curr_alt_state = new_alt_state; - } + if (curr_arc->to_state->component_states == NULL) + { + new_alt_state = get_free_alt_state (); + new_alt_state->next_alt_state = curr_alt_state; + new_alt_state->state = curr_arc->to_state; + curr_alt_state = new_alt_state; + } + else + for (alt_state = curr_arc->to_state->component_states; + alt_state != NULL; + alt_state = alt_state->next_sorted_alt_state) + { + new_alt_state = get_free_alt_state (); + new_alt_state->next_alt_state = curr_alt_state; + new_alt_state->state = alt_state->state; + if (alt_state->state->component_states != NULL) + abort (); + curr_alt_state = new_alt_state; + } /* There are not identical sets in the alt state list. */ canonical_alt_states_list = uniq_sort_alt_states (curr_alt_state); if (canonical_alt_states_list->next_sorted_alt_state == NULL) @@ -5714,6 +6015,7 @@ create_composed_state (original_state, arcs_marked_by_insn, state_stack) { if (state->it_was_placed_in_stack_for_DFA_forming) abort (); + new_state_p = 1; for (curr_alt_state = state->component_states; curr_alt_state != NULL; curr_alt_state = curr_alt_state->next_sorted_alt_state) @@ -5740,6 +6042,7 @@ create_composed_state (original_state, arcs_marked_by_insn, state_stack) state->it_was_placed_in_stack_for_DFA_forming = 1; VLA_PTR_ADD (*state_stack, state); } + return new_state_p; } /* The function transforms nondeterministic AUTOMATON into @@ -5753,12 +6056,14 @@ NDFA_to_DFA (automaton) decl_t decl; vla_ptr_t state_stack; int i; + int states_n; VLA_PTR_CREATE (state_stack, 150, "state stack"); /* Create the start state (empty state). */ start_state = automaton->start_state; start_state->it_was_placed_in_stack_for_DFA_forming = 1; VLA_PTR_ADD (state_stack, start_state); + states_n = 1; while (VLA_PTR_LENGTH (state_stack) != 0) { state = VLA_PTR (state_stack, VLA_PTR_LENGTH (state_stack) - 1); @@ -5767,10 +6072,15 @@ NDFA_to_DFA (automaton) for (i = 0; i < description->decls_num; i++) { decl = description->decls [i]; - if (decl->mode == dm_insn_reserv) - create_composed_state - (state, DECL_INSN_RESERV (decl)->arcs_marked_by_insn, - &state_stack); + if (decl->mode == dm_insn_reserv + && create_composed_state + (state, DECL_INSN_RESERV (decl)->arcs_marked_by_insn, + &state_stack)) + { + states_n++; + if (states_n % 100 == 0) + fprintf (stderr, "*"); + } } } VLA_PTR_DELETE (state_stack); @@ -5892,19 +6202,39 @@ copy_equiv_class (to, from) VLA_PTR_ADD (*to, *class_ptr); } +/* The following function returns TRUE if STATE reserves the unit with + UNIT_NUM on the first cycle. */ +static int +first_cycle_unit_presence (state, unit_num) + state_t state; + int unit_num; +{ + int presence_p; + + if (state->component_states == NULL) + presence_p = test_unit_reserv (state->reservs, 0, unit_num); + else + presence_p + = test_unit_reserv (state->component_states->state->reservs, + 0, unit_num); + return presence_p; +} + /* The function returns nonzero value if STATE is not equivalent to - another state from the same current partition on equivalence - classes Another state has ORIGINAL_STATE_OUT_ARCS_NUM number of + ANOTHER_STATE from the same current partition on equivalence + classes. Another state has ANOTHER_STATE_OUT_ARCS_NUM number of output arcs. Iteration of making equivalence partition is defined by ODD_ITERATION_FLAG. */ static int -state_is_differed (state, original_state_out_arcs_num, odd_iteration_flag) - state_t state; - int original_state_out_arcs_num; +state_is_differed (state, another_state, another_state_out_arcs_num, + odd_iteration_flag) + state_t state, another_state; + int another_state_out_arcs_num; int odd_iteration_flag; { arc_t arc; int state_out_arcs_num; + int i, presence1_p, presence2_p; state_out_arcs_num = 0; for (arc = first_out_arc (state); arc != NULL; arc = next_out_arc (arc)) @@ -5917,7 +6247,19 @@ state_is_differed (state, original_state_out_arcs_num, odd_iteration_flag) || (arc->insn->insn_reserv_decl->state_alts != arc->state_alts)) return 1; } - return state_out_arcs_num != original_state_out_arcs_num; + if (state_out_arcs_num != another_state_out_arcs_num) + return 1; + /* Now we are looking at the states with the point of view of query + units. */ + for (i = 0; i < description->units_num; i++) + if (units_array [i]->query_p) + { + presence1_p = first_cycle_unit_presence (state, i); + presence2_p = first_cycle_unit_presence (another_state, i); + if ((presence1_p && !presence2_p) || (!presence1_p && presence2_p)) + return 1; + } + return 0; } /* The function makes initial partition of STATES on equivalent @@ -5982,7 +6324,7 @@ partition_equiv_class (equiv_class_ptr, odd_iteration_flag, curr_state = next_state) { next_state = curr_state->next_equiv_class_state; - if (state_is_differed (curr_state, out_arcs_num, + if (state_is_differed (curr_state, first_state, out_arcs_num, odd_iteration_flag)) { /* Remove curr state from the class equivalence. */ @@ -6068,7 +6410,7 @@ merge_states (automaton, equiv_classes) state_t new_state; state_t first_class_state; alt_state_t alt_states; - alt_state_t new_alt_state; + alt_state_t alt_state, new_alt_state; arc_t curr_arc; arc_t next_arc; @@ -6089,12 +6431,27 @@ merge_states (automaton, equiv_classes) curr_state = curr_state->next_equiv_class_state) { curr_state->equiv_class_state = new_state; - new_alt_state = get_free_alt_state (); - new_alt_state->state = curr_state; - new_alt_state->next_sorted_alt_state = alt_states; - alt_states = new_alt_state; + if (curr_state->component_states == NULL) + { + new_alt_state = get_free_alt_state (); + new_alt_state->state = curr_state; + new_alt_state->next_alt_state = alt_states; + alt_states = new_alt_state; + } + else + for (alt_state = curr_state->component_states; + alt_state != NULL; + alt_state = alt_state->next_sorted_alt_state) + { + new_alt_state = get_free_alt_state (); + new_alt_state->state = alt_state->state; + new_alt_state->next_alt_state = alt_states; + alt_states = new_alt_state; + } } - new_state->component_states = alt_states; + /* Its is important that alt states were sorted before and + after merging to have the same quering results. */ + new_state->component_states = uniq_sort_alt_states (alt_states); } else (*equiv_class_ptr)->equiv_class_state = *equiv_class_ptr; @@ -6212,13 +6569,25 @@ build_automaton (automaton) int arcs_num; ticker_on (&NDFA_time); + if (automaton->corresponding_automaton_decl == NULL) + fprintf (stderr, "Create anonymous automaton (1 star is 100 new states):"); + else + fprintf (stderr, "Create automaton `%s' (1 star is 100 new states):", + automaton->corresponding_automaton_decl->name); make_automaton (automaton); + fprintf (stderr, " done\n"); ticker_off (&NDFA_time); count_states_and_arcs (automaton, &states_num, &arcs_num); automaton->NDFA_states_num = states_num; automaton->NDFA_arcs_num = arcs_num; ticker_on (&NDFA_to_DFA_time); + if (automaton->corresponding_automaton_decl == NULL) + fprintf (stderr, "Make anonymous DFA (1 star is 100 new states):"); + else + fprintf (stderr, "Make DFA `%s' (1 star is 100 new states):", + automaton->corresponding_automaton_decl->name); NDFA_to_DFA (automaton); + fprintf (stderr, " done\n"); ticker_off (&NDFA_to_DFA_time); count_states_and_arcs (automaton, &states_num, &arcs_num); automaton->DFA_states_num = states_num; @@ -6226,7 +6595,13 @@ build_automaton (automaton) if (!no_minimization_flag) { ticker_on (&minimize_time); + if (automaton->corresponding_automaton_decl == NULL) + fprintf (stderr, "Minimize anonymous DFA..."); + else + fprintf (stderr, "Minimize DFA `%s'...", + automaton->corresponding_automaton_decl->name); minimize_DFA (automaton); + fprintf (stderr, "done\n"); ticker_off (&minimize_time); count_states_and_arcs (automaton, &states_num, &arcs_num); automaton->minimal_DFA_states_num = states_num; @@ -6440,7 +6815,8 @@ estimate_one_automaton_bound () decl = description->decls [i]; if (decl->mode == dm_unit) { - root_value = exp (log (DECL_UNIT (decl)->max_occ_cycle_num + 1.0) + root_value = exp (log (DECL_UNIT (decl)->max_occ_cycle_num + - DECL_UNIT (decl)->min_occ_cycle_num + 1.0) / automata_num); if (MAX_FLOATING_POINT_VALUE_FOR_AUTOMATON_BOUND / root_value > one_automaton_estimation_bound) @@ -6659,18 +7035,18 @@ create_automata () curr_automaton = curr_automaton->next_automaton) { if (curr_automaton->corresponding_automaton_decl == NULL) - fprintf (stderr, "Create anonymous automaton ..."); + fprintf (stderr, "Prepare anonymous automaton creation ... "); else - fprintf (stderr, "Create automaton `%s'...", + fprintf (stderr, "Prepare automaton `%s' creation...", curr_automaton->corresponding_automaton_decl->name); create_alt_states (curr_automaton); form_ainsn_with_same_reservs (curr_automaton); + fprintf (stderr, "done\n"); build_automaton (curr_automaton); enumerate_states (curr_automaton); ticker_on (&equiv_time); set_insn_equiv_classes (curr_automaton); ticker_off (&equiv_time); - fprintf (stderr, "done\n"); } } @@ -7164,6 +7540,8 @@ output_reserved_units_table_name (f, automaton) #define CPU_UNIT_RESERVATION_P_FUNC_NAME "cpu_unit_reservation_p" +#define DFA_CLEAN_INSN_CACHE_FUNC_NAME "dfa_clean_insn_cache" + #define DFA_START_FUNC_NAME "dfa_start" #define DFA_FINISH_FUNC_NAME "dfa_finish" @@ -7658,7 +8036,6 @@ output_state_alts_table (automaton) value for an ainsn and state. */ static int curr_state_pass_num; - /* This recursive function passes states to find minimal issue delay value for AINSN. The state being visited is STATE. The function returns minimal issue delay value for AINSN in STATE or -1 if we @@ -7773,7 +8150,7 @@ output_min_issue_delay_table (automaton) + ainsn->insn_equiv_class_num) = min_delay; } } - fprintf (output_file, "/* Vector of min issue delay of insns.*/\n"); + fprintf (output_file, "/* Vector of min issue delay of insns. */\n"); fprintf (output_file, "static const "); output_range_type (output_file, 0, automaton->max_min_delay); fprintf (output_file, " "); @@ -7894,14 +8271,12 @@ output_reserved_units_table (automaton) curr_state_ptr++) { for (i = 0; i < description->units_num; i++) - if (units_array [i]->query_p) - { - if (test_unit_reserv ((*curr_state_ptr)->reservs, 0, i)) - VLA_HWINT (reserved_units_table, - (*curr_state_ptr)->order_state_num * state_byte_size - + units_array [i]->query_num / 8) - += (1 << (units_array [i]->query_num % 8)); - } + if (units_array [i]->query_p + && first_cycle_unit_presence (*curr_state_ptr, i)) + VLA_HWINT (reserved_units_table, + (*curr_state_ptr)->order_state_num * state_byte_size + + units_array [i]->query_num / 8) + += (1 << (units_array [i]->query_num % 8)); } fprintf (output_file, "/* Vector for reserved units of states. */\n"); fprintf (output_file, "static const "); @@ -7939,13 +8314,10 @@ output_tables () AUTOMATON_STATE_ALTS_MACRO_NAME); output_min_issue_delay_table (automaton); output_dead_lock_vect (automaton); - if (no_minimization_flag) - { - fprintf (output_file, "\n#if %s\n\n", CPU_UNITS_QUERY_MACRO_NAME); - output_reserved_units_table (automaton); - fprintf (output_file, "\n#endif /* #if %s */\n\n", - CPU_UNITS_QUERY_MACRO_NAME); - } + fprintf (output_file, "\n#if %s\n\n", CPU_UNITS_QUERY_MACRO_NAME); + output_reserved_units_table (automaton); + fprintf (output_file, "\n#endif /* #if %s */\n\n", + CPU_UNITS_QUERY_MACRO_NAME); } fprintf (output_file, "\n#define %s %d\n\n", ADVANCE_CYCLE_VALUE_NAME, DECL_INSN_RESERV (advance_cycle_insn_decl)->insn_num); @@ -8743,16 +9115,13 @@ output_cpu_unit_reservation_p () fprintf (output_file, " return 0;\n}\n\n"); } -/* The function outputs PHR interface function `dfa_start'. */ +/* The function outputs PHR interface function `dfa_clean_insn_cache'. */ static void -output_dfa_start_func () +output_dfa_clean_insn_cache_func () { fprintf (output_file, - "void\n%s ()\n{\n int %s;\n\n %s = get_max_uid ();\n", - DFA_START_FUNC_NAME, I_VARIABLE_NAME, - DFA_INSN_CODES_LENGTH_VARIABLE_NAME); - fprintf (output_file, " %s = (int *) xmalloc (%s * sizeof (int));\n", - DFA_INSN_CODES_VARIABLE_NAME, DFA_INSN_CODES_LENGTH_VARIABLE_NAME); + "void\n%s ()\n{\n int %s;\n\n", + DFA_CLEAN_INSN_CACHE_FUNC_NAME, I_VARIABLE_NAME); fprintf (output_file, " for (%s = 0; %s < %s; %s++)\n %s [%s] = -1;\n}\n\n", I_VARIABLE_NAME, I_VARIABLE_NAME, @@ -8760,6 +9129,18 @@ output_dfa_start_func () DFA_INSN_CODES_VARIABLE_NAME, I_VARIABLE_NAME); } +/* The function outputs PHR interface function `dfa_start'. */ +static void +output_dfa_start_func () +{ + fprintf (output_file, + "void\n%s ()\n{\n %s = get_max_uid ();\n", + DFA_START_FUNC_NAME, DFA_INSN_CODES_LENGTH_VARIABLE_NAME); + fprintf (output_file, " %s = (int *) xmalloc (%s * sizeof (int));\n", + DFA_INSN_CODES_VARIABLE_NAME, DFA_INSN_CODES_LENGTH_VARIABLE_NAME); + fprintf (output_file, " %s ();\n}\n\n", DFA_CLEAN_INSN_CACHE_FUNC_NAME); +} + /* The function outputs PHR interface function `dfa_finish'. */ static void output_dfa_finish_func () @@ -8792,11 +9173,29 @@ output_unit_set_el_list (list) for (el = list; el != NULL; el = el->next_unit_set_el) { if (el != list) - fprintf (output_description_file, ","); + fprintf (output_description_file, ", "); fprintf (output_description_file, "%s", el->unit_decl->name); } } +/* Output patterns in LIST separated by comma. */ +static void +output_pattern_set_el_list (list) + pattern_set_el_t list; +{ + pattern_set_el_t el; + int i; + + for (el = list; el != NULL; el = el->next_pattern_set_el) + { + if (el != list) + fprintf (output_description_file, ", "); + for (i = 0; i < el->units_num; i++) + fprintf (output_description_file, (i == 0 ? "%s" : " %s"), + el->unit_decls [i]->name); + } +} + /* The function outputs string representation of IR define_reservation and define_insn_reservation. */ static void @@ -8821,16 +9220,32 @@ output_description () { fprintf (output_description_file, "unit %s presence_set: ", DECL_UNIT (decl)->name); - output_unit_set_el_list (DECL_UNIT (decl)->presence_list); + output_pattern_set_el_list (DECL_UNIT (decl)->presence_list); fprintf (output_description_file, "\n"); } + if (DECL_UNIT (decl)->final_presence_list != NULL) + { + fprintf (output_description_file, "unit %s final_presence_set: ", + DECL_UNIT (decl)->name); + output_pattern_set_el_list + (DECL_UNIT (decl)->final_presence_list); + fprintf (output_description_file, "\n"); + } if (DECL_UNIT (decl)->absence_list != NULL) { fprintf (output_description_file, "unit %s absence_set: ", DECL_UNIT (decl)->name); - output_unit_set_el_list (DECL_UNIT (decl)->absence_list); + output_pattern_set_el_list (DECL_UNIT (decl)->absence_list); fprintf (output_description_file, "\n"); } + if (DECL_UNIT (decl)->final_absence_list != NULL) + { + fprintf (output_description_file, "unit %s final_absence_set: ", + DECL_UNIT (decl)->name); + output_pattern_set_el_list + (DECL_UNIT (decl)->final_absence_list); + fprintf (output_description_file, "\n"); + } } } fprintf (output_description_file, "\n"); @@ -9092,6 +9507,7 @@ output_statistics (f) FILE *f; { automaton_t automaton; + int states_num; #ifndef NDEBUG int transition_comb_vect_els = 0; int transition_full_vect_els = 0; @@ -9110,10 +9526,14 @@ output_statistics (f) automaton->NDFA_states_num, automaton->NDFA_arcs_num); fprintf (f, " %5d DFA states, %5d DFA arcs\n", automaton->DFA_states_num, automaton->DFA_arcs_num); + states_num = automaton->DFA_states_num; if (!no_minimization_flag) - fprintf (f, " %5d minimal DFA states, %5d minimal DFA arcs\n", - automaton->minimal_DFA_states_num, - automaton->minimal_DFA_arcs_num); + { + fprintf (f, " %5d minimal DFA states, %5d minimal DFA arcs\n", + automaton->minimal_DFA_states_num, + automaton->minimal_DFA_arcs_num); + states_num = automaton->minimal_DFA_states_num; + } fprintf (f, " %5d all insns %5d insn equivalence classes\n", description->insns_num, automaton->insn_equiv_classes_num); #ifndef NDEBUG @@ -9131,7 +9551,7 @@ output_statistics (f) ? "use comb vect" : "use simple vect")); fprintf (f, "%5ld min delay table els, compression factor %d\n", - (long) automaton->DFA_states_num * automaton->insn_equiv_classes_num, + (long) states_num * automaton->insn_equiv_classes_num, automaton->min_issue_delay_table_compression_factor); transition_comb_vect_els += VLA_HWINT_LENGTH (automaton->trans_table->comb_vect); @@ -9142,7 +9562,7 @@ output_statistics (f) state_alts_full_vect_els += VLA_HWINT_LENGTH (automaton->state_alts_table->full_vect); min_issue_delay_vect_els - += automaton->DFA_states_num * automaton->insn_equiv_classes_num; + += states_num * automaton->insn_equiv_classes_num; #endif } #ifndef NDEBUG @@ -9200,7 +9620,7 @@ generate () initiate_automata_lists (); initiate_pass_states (); initiate_excl_sets (); - initiate_presence_absence_sets (); + initiate_presence_absence_pattern_sets (); automaton_generation_time = create_ticker (); create_automata (); ticker_off (&automaton_generation_time); @@ -9680,14 +10100,13 @@ write_automata () output_internal_insn_latency_func (); output_insn_latency_func (); output_print_reservation_func (); - if (no_minimization_flag) - { - fprintf (output_file, "\n#if %s\n\n", CPU_UNITS_QUERY_MACRO_NAME); - output_get_cpu_unit_code_func (); - output_cpu_unit_reservation_p (); - fprintf (output_file, "\n#endif /* #if %s */\n\n", - CPU_UNITS_QUERY_MACRO_NAME); - } + /* Output function get_cpu_unit_code. */ + fprintf (output_file, "\n#if %s\n\n", CPU_UNITS_QUERY_MACRO_NAME); + output_get_cpu_unit_code_func (); + output_cpu_unit_reservation_p (); + fprintf (output_file, "\n#endif /* #if %s */\n\n", + CPU_UNITS_QUERY_MACRO_NAME); + output_dfa_clean_insn_cache_func (); output_dfa_start_func (); output_dfa_finish_func (); fprintf (stderr, "done\n"); diff --git a/gcc/haifa-sched.c b/gcc/haifa-sched.c index 5a0c208b219..046abc34423 100644 --- a/gcc/haifa-sched.c +++ b/gcc/haifa-sched.c @@ -320,6 +320,7 @@ static int rank_for_schedule PARAMS ((const PTR, const PTR)); static void swap_sort PARAMS ((rtx *, int)); static void queue_insn PARAMS ((rtx, int)); static void schedule_insn PARAMS ((rtx, struct ready_list *, int)); +static int find_set_reg_weight PARAMS ((rtx)); static void find_insn_reg_weight PARAMS ((int)); static void adjust_priority PARAMS ((rtx)); static void advance_one_cycle PARAMS ((void)); @@ -366,7 +367,7 @@ static rtx move_insn PARAMS ((rtx, rtx)); on the first cycle. It is used only for DFA based scheduler. */ static rtx ready_element PARAMS ((struct ready_list *, int)); static rtx ready_remove PARAMS ((struct ready_list *, int)); -static int max_issue PARAMS ((struct ready_list *, state_t, int *)); +static int max_issue PARAMS ((struct ready_list *, int *)); static rtx choose_ready PARAMS ((struct ready_list *)); @@ -853,6 +854,7 @@ rank_for_schedule (x, y) /* Prefer insn with higher priority. */ priority_val = INSN_PRIORITY (tmp2) - INSN_PRIORITY (tmp); + if (priority_val) return priority_val; @@ -1017,8 +1019,10 @@ ready_element (ready, index) struct ready_list *ready; int index; { +#ifdef ENABLE_CHECKING if (ready->n_ready == 0 || index >= ready->n_ready) abort (); +#endif return ready->vec[ready->first - index]; } @@ -1195,11 +1199,12 @@ schedule_insn (insn, ready, clock) to issue on the same cycle as the previous insn. A machine may use this information to decide how the instruction should be aligned. */ - if (reload_completed && issue_rate > 1 + if (issue_rate > 1 && GET_CODE (PATTERN (insn)) != USE && GET_CODE (PATTERN (insn)) != CLOBBER) { - PUT_MODE (insn, clock > last_clock_var ? TImode : VOIDmode); + if (reload_completed) + PUT_MODE (insn, clock > last_clock_var ? TImode : VOIDmode); last_clock_var = clock; } } @@ -1536,6 +1541,32 @@ rm_other_notes (head, tail) /* Functions for computation of registers live/usage info. */ +/* This function looks for a new register being defined. + If the destination register is already used by the source, + a new register is not needed. */ + +static int +find_set_reg_weight (x) + rtx x; +{ + if (GET_CODE (x) == CLOBBER + && register_operand (SET_DEST (x), VOIDmode)) + return 1; + if (GET_CODE (x) == SET + && register_operand (SET_DEST (x), VOIDmode)) + { + if (GET_CODE (SET_DEST (x)) == REG) + { + if (!reg_mentioned_p (SET_DEST (x), SET_SRC (x))) + return 1; + else + return 0; + } + return 1; + } + return 0; +} + /* Calculate INSN_REG_WEIGHT for all insns of a block. */ static void @@ -1558,21 +1589,16 @@ find_insn_reg_weight (b) /* Increment weight for each register born here. */ x = PATTERN (insn); - if ((GET_CODE (x) == SET || GET_CODE (x) == CLOBBER) - && register_operand (SET_DEST (x), VOIDmode)) - reg_weight++; - else if (GET_CODE (x) == PARALLEL) - { - int j; - for (j = XVECLEN (x, 0) - 1; j >= 0; j--) - { - x = XVECEXP (PATTERN (insn), 0, j); - if ((GET_CODE (x) == SET || GET_CODE (x) == CLOBBER) - && register_operand (SET_DEST (x), VOIDmode)) - reg_weight++; - } - } - + reg_weight += find_set_reg_weight (x); + if (GET_CODE (x) == PARALLEL) + { + int j; + for (j = XVECLEN (x, 0) - 1; j >= 0; j--) + { + x = XVECEXP (PATTERN (insn), 0, j); + reg_weight += find_set_reg_weight (x); + } + } /* Decrement weight for each register that dies here. */ for (x = REG_NOTES (insn); x; x = XEXP (x, 1)) { @@ -1743,25 +1769,6 @@ move_insn (insn, last) { rtx retval = NULL; - /* If INSN has SCHED_GROUP_P set, then issue it and any other - insns with SCHED_GROUP_P set first. */ - while (SCHED_GROUP_P (insn)) - { - rtx prev = PREV_INSN (insn); - - /* Move a SCHED_GROUP_P insn. */ - move_insn1 (insn, last); - /* If this is the first call to reemit_notes, then record - its return value. */ - if (retval == NULL_RTX) - retval = reemit_notes (insn, insn); - else - reemit_notes (insn, insn); - /* Consume SCHED_GROUP_P flag. */ - SCHED_GROUP_P (insn) = 0; - insn = prev; - } - /* Now move the first non SCHED_GROUP_P insn. */ move_insn1 (insn, last); @@ -1772,90 +1779,109 @@ move_insn (insn, last) else reemit_notes (insn, insn); + SCHED_GROUP_P (insn) = 0; + return retval; } +/* The following structure describe an entry of the stack of choices. */ +struct choice_entry +{ + /* Ordinal number of the issued insn in the ready queue. */ + int index; + /* The number of the rest insns whose issues we should try. */ + int rest; + /* The number of issued essential insns. */ + int n; + /* State after issuing the insn. */ + state_t state; +}; + +/* The following array is used to implement a stack of choices used in + function max_issue. */ +static struct choice_entry *choice_stack; + +/* The following variable value is number of essential insns issued on + the current cycle. An insn is essential one if it changes the + processors state. */ +static int cycle_issued_insns; + /* The following function returns maximal (or close to maximal) number of insns which can be issued on the same cycle and one of which - insns is insns with the best rank (the last insn in READY). To + insns is insns with the best rank (the first insn in READY). To make this function tries different samples of ready insns. READY is current queue `ready'. Global array READY_TRY reflects what - insns are already issued in this try. STATE is current processor - state. If the function returns nonzero, INDEX will contain index + insns are already issued in this try. INDEX will contain index of the best insn in READY. The following function is used only for first cycle multipass scheduling. */ - static int -max_issue (ready, state, index) - struct ready_list *ready; - state_t state; - int *index; +max_issue (ready, index) + struct ready_list *ready; + int *index; { - int i, best, n, temp_index, delay; - state_t temp_state; + int n, i, all, n_ready, lookahead, best, delay; + struct choice_entry *top; rtx insn; - int max_lookahead = (*targetm.sched.first_cycle_multipass_dfa_lookahead) (); - if (state_dead_lock_p (state)) - return 0; - - temp_state = alloca (dfa_state_size); + lookahead = (*targetm.sched.first_cycle_multipass_dfa_lookahead) (); best = 0; - - for (i = 0; i < ready->n_ready; i++) + memcpy (choice_stack->state, curr_state, dfa_state_size); + top = choice_stack; + top->rest = lookahead; + top->n = 0; + n_ready = ready->n_ready; + for (all = i = 0; i < n_ready; i++) if (!ready_try [i]) - { - insn = ready_element (ready, i); - - if (INSN_CODE (insn) < 0) - continue; - - memcpy (temp_state, state, dfa_state_size); - - delay = state_transition (temp_state, insn); - - if (delay == 0) - { - if (!targetm.sched.dfa_bubble) - continue; - else - { - int j; - rtx bubble; - - for (j = 0; - (bubble = (*targetm.sched.dfa_bubble) (j)) != NULL_RTX; - j++) - if (state_transition (temp_state, bubble) < 0 - && state_transition (temp_state, insn) < 0) - break; - - if (bubble == NULL_RTX) - continue; - } - } - else if (delay > 0) - continue; - - --max_lookahead; - - if (max_lookahead < 0) - break; - - ready_try [i] = 1; - - n = max_issue (ready, temp_state, &temp_index); - if (n > 0 || ready_try[0]) - n += 1; - - if (best < n) - { - best = n; - *index = i; - } - ready_try [i] = 0; - } - + all++; + i = 0; + for (;;) + { + if (top->rest == 0 || i >= n_ready) + { + if (top == choice_stack) + break; + if (best < top - choice_stack && ready_try [0]) + { + best = top - choice_stack; + *index = choice_stack [1].index; + if (top->n == issue_rate - cycle_issued_insns || best == all) + break; + } + i = top->index; + ready_try [i] = 0; + top--; + memcpy (curr_state, top->state, dfa_state_size); + } + else if (!ready_try [i]) + { + insn = ready_element (ready, i); + delay = state_transition (curr_state, insn); + if (delay < 0) + { + if (state_dead_lock_p (curr_state)) + top->rest = 0; + else + top->rest--; + n = top->n; + if (memcmp (top->state, curr_state, dfa_state_size) != 0) + n++; + top++; + top->rest = lookahead; + top->index = i; + top->n = n; + memcpy (top->state, curr_state, dfa_state_size); + ready_try [i] = 1; + i = -1; + } + } + i++; + } + while (top != choice_stack) + { + ready_try [top->index] = 0; + top--; + } + memcpy (curr_state, choice_stack->state, dfa_state_size); return best; } @@ -1873,9 +1899,21 @@ choose_ready (ready) else { /* Try to choose the better insn. */ - int index; + int index, i; + rtx insn; - if (max_issue (ready, curr_state, &index) == 0) + insn = ready_element (ready, 0); + if (INSN_CODE (insn) < 0) + return ready_remove_first (ready); + for (i = 1; i < ready->n_ready; i++) + { + insn = ready_element (ready, i); + ready_try [i] + = (INSN_CODE (insn) < 0 + || (targetm.sched.first_cycle_multipass_dfa_lookahead_guard + && !(*targetm.sched.first_cycle_multipass_dfa_lookahead_guard) (insn))); + } + if (max_issue (ready, &index) == 0) return ready_remove_first (ready); else return ready_remove (ready, index); @@ -1903,9 +1941,10 @@ schedule_block (b, rgn_n_insns) int rgn_n_insns; { struct ready_list ready; - int first_cycle_insn_p; + int i, first_cycle_insn_p; int can_issue_more; state_t temp_state = NULL; /* It is used for multipass scheduling. */ + int sort_p; /* Head/tail info for this block. */ rtx prev_head = current_sched_info->prev_head; @@ -1957,6 +1996,11 @@ schedule_block (b, rgn_n_insns) temp_state = alloca (dfa_state_size); ready_try = (char *) xmalloc ((rgn_n_insns + 1) * sizeof (char)); memset (ready_try, 0, (rgn_n_insns + 1) * sizeof (char)); + choice_stack + = (struct choice_entry *) xmalloc ((rgn_n_insns + 1) + * sizeof (struct choice_entry)); + for (i = 0; i <= rgn_n_insns; i++) + choice_stack[i].state = (state_t) xmalloc (dfa_state_size); } (*current_sched_info->init_ready_list) (&ready); @@ -1985,6 +2029,7 @@ schedule_block (b, rgn_n_insns) /* Start just before the beginning of time. */ clock_var = -1; + sort_p = TRUE; /* Loop until all the insns in BB are scheduled. */ while ((*current_sched_info->schedule_more_p) ()) { @@ -2007,8 +2052,17 @@ schedule_block (b, rgn_n_insns) debug_ready_list (&ready); } - /* Sort the ready list based on priority. */ - ready_sort (&ready); + if (sort_p) + { + /* Sort the ready list based on priority. */ + ready_sort (&ready); + + if (sched_verbose >= 2) + { + fprintf (sched_dump, ";;\t\tReady list after ready_sort: "); + debug_ready_list (&ready); + } + } /* Allow the target to reorder the list, typically for better instruction bundling. */ @@ -2021,6 +2075,7 @@ schedule_block (b, rgn_n_insns) can_issue_more = issue_rate; first_cycle_insn_p = 1; + cycle_issued_insns = 0; for (;;) { rtx insn; @@ -2050,8 +2105,21 @@ schedule_block (b, rgn_n_insns) break; /* Select and remove the insn from the ready list. */ - insn = choose_ready (&ready); + if (sort_p) + insn = choose_ready (&ready); + else + insn = ready_remove_first (&ready); + if (targetm.sched.dfa_new_cycle + && (*targetm.sched.dfa_new_cycle) (sched_dump, sched_verbose, + insn, last_clock_var, + clock_var, &sort_p)) + { + ready_add (&ready, insn); + break; + } + + sort_p = TRUE; memcpy (temp_state, curr_state, dfa_state_size); if (recog_memoized (insn) < 0) { @@ -2155,7 +2223,11 @@ schedule_block (b, rgn_n_insns) if (targetm.sched.use_dfa_pipeline_interface && (*targetm.sched.use_dfa_pipeline_interface) ()) - memcpy (curr_state, temp_state, dfa_state_size); + { + if (memcmp (curr_state, temp_state, dfa_state_size) != 0) + cycle_issued_insns++; + memcpy (curr_state, temp_state, dfa_state_size); + } if (targetm.sched.variable_issue) can_issue_more = @@ -2172,13 +2244,16 @@ schedule_block (b, rgn_n_insns) next: first_cycle_insn_p = 0; + /* Sort the ready list based on priority. This must be + redone here, as schedule_insn may have readied additional + insns that will not be sorted correctly. */ + if (ready.n_ready > 0) + ready_sort (&ready); + if (targetm.sched.reorder2) { - /* Sort the ready list based on priority. */ - if (ready.n_ready > 0) - ready_sort (&ready); can_issue_more = - (*targetm.sched.reorder2) (sched_dump,sched_verbose, + (*targetm.sched.reorder2) (sched_dump, sched_verbose, ready.n_ready ? ready_lastpos (&ready) : NULL, &ready.n_ready, clock_var); @@ -2214,6 +2289,27 @@ schedule_block (b, rgn_n_insns) head = NEXT_INSN (prev_head); tail = last_scheduled_insn; + if (!reload_completed) + { + rtx insn, link, next; + + /* INSN_TICK (minimum clock tick at which the insn becomes + ready) may be not correct for the insn in the subsequent + blocks of the region. We should use a correct value of + `clock_var' or modify INSN_TICK. It is better to keep + clock_var value equal to 0 at the start of a basic block. + Therefore we modify INSN_TICK here. */ + for (insn = head; insn != tail; insn = NEXT_INSN (insn)) + if (INSN_P (insn)) + { + for (link = INSN_DEPEND (insn); link != 0; link = XEXP (link, 1)) + { + next = XEXP (link, 0); + INSN_TICK (next) -= clock_var; + } + } + } + /* Restore-other-notes: NOTE_LIST is the end of a chain of notes previously found among the insns. Insert them at the beginning of the insns. */ @@ -2250,7 +2346,12 @@ schedule_block (b, rgn_n_insns) if (targetm.sched.use_dfa_pipeline_interface && (*targetm.sched.use_dfa_pipeline_interface) ()) - free (ready_try); + { + free (ready_try); + for (i = 0; i <= rgn_n_insns; i++) + free (choice_stack [i].state); + free (choice_stack); + } } /* Set_priorities: compute priority of each insn in the block. */ @@ -2275,8 +2376,7 @@ set_priorities (head, tail) if (GET_CODE (insn) == NOTE) continue; - if (!(SCHED_GROUP_P (insn))) - n_insn++; + n_insn++; (void) priority (insn); } diff --git a/gcc/rtl.def b/gcc/rtl.def index 63413864742..c1f812bc071 100644 --- a/gcc/rtl.def +++ b/gcc/rtl.def @@ -358,9 +358,8 @@ DEF_RTL_EXPR(ADDRESS, "address", "e", 'm') DEF_RTL_EXPR(DEFINE_CPU_UNIT, "define_cpu_unit", "sS", 'x') /* (define_query_cpu_unit string [string]) describes cpu functional - units analogously to define_cpu_unit. If we use automaton without - minimization, the reservation of such units can be queried for - automaton state. */ + units analogously to define_cpu_unit. The reservation of such + units can be queried for automaton state. */ DEF_RTL_EXPR(DEFINE_QUERY_CPU_UNIT, "define_query_cpu_unit", "sS", 'x') /* (exclusion_set string string) means that each CPU functional unit @@ -370,28 +369,80 @@ DEF_RTL_EXPR(DEFINE_QUERY_CPU_UNIT, "define_query_cpu_unit", "sS", 'x') for description CPU with fully pipelined floating point functional unit which can execute simultaneously only single floating point insns or only double floating point insns. All CPU functional - units in a set should belong the same automaton. */ + units in a set should belong to the same automaton. */ DEF_RTL_EXPR(EXCLUSION_SET, "exclusion_set", "ss", 'x') /* (presence_set string string) means that each CPU functional unit in - the first string can not be reserved unless at least one of units - whose names are in the second string is reserved. This is an - asymmetric relation. CPU units in the string are separated by - commas. For example, it is useful for description that slot1 is - reserved after slot0 reservation for VLIW processor. All CPU - functional units in a set should belong the same automaton. */ + the first string can not be reserved unless at least one of pattern + of units whose names are in the second string is reserved. This is + an asymmetric relation. CPU units or unit patterns in the strings + are separated by commas. Pattern is one unit name or unit names + separated by white-spaces. + + For example, it is useful for description that slot1 is reserved + after slot0 reservation for a VLIW processor. We could describe it + by the following construction + + (presence_set "slot1" "slot0") + + Or slot1 is reserved only after slot0 and unit b0 reservation. In + this case we could write + + (presence_set "slot1" "slot0 b0") + + All CPU functional units in a set should belong to the same + automaton. */ DEF_RTL_EXPR(PRESENCE_SET, "presence_set", "ss", 'x') +/* (final_presence_set string string) is analogous to `presence_set'. + The difference between them is when checking is done. When an + instruction is issued in given automaton state reflecting all + current and planned unit reservations, the automaton state is + changed. The first state is a source state, the second one is a + result state. Checking for `presence_set' is done on the source + state reservation, checking for `final_presence_set' is done on the + result reservation. This construction is useful to describe a + reservation which is actually two subsequent reservations. For + example, if we use + + (presence_set "slot1" "slot0") + + the following insn will be never issued (because slot1 requires + slot0 which is absent in the source state). + + (define_reservation "insn_and_nop" "slot0 + slot1") + + but it can be issued if we use analogous `final_presence_set'. */ +DEF_RTL_EXPR(FINAL_PRESENCE_SET, "final_presence_set", "ss", 'x') + /* (absence_set string string) means that each CPU functional unit in - the first string can not be reserved only if each unit whose name - is in the second string is not reserved. This is an asymmetric - relation (actually exclusion set is analogous to this one but it is - symmetric). CPU units in the string are separated by commas. For - example, it is useful for description that slot0 can not be - reserved after slot1 or slot2 reservation for VLIW processor. All - CPU functional units in a set should belong the same automaton. */ + the first string can be reserved only if each pattern of units + whose names are in the second string is not reserved. This is an + asymmetric relation (actually exclusion set is analogous to this + one but it is symmetric). CPU units or unit patterns in the string + are separated by commas. Pattern is one unit name or unit names + separated by white-spaces. + + For example, it is useful for description that slot0 can not be + reserved after slot1 or slot2 reservation for a VLIW processor. We + could describe it by the following construction + + (absence_set "slot2" "slot0, slot1") + + Or slot2 can not be reserved if slot0 and unit b0 are reserved or + slot1 and unit b1 are reserved . In this case we could write + + (absence_set "slot2" "slot0 b0, slot1 b1") + + All CPU functional units in a set should to belong the same + automaton. */ DEF_RTL_EXPR(ABSENCE_SET, "absence_set", "ss", 'x') +/* (final_absence_set string string) is analogous to `absence_set' but + checking is done on the result (state) reservation. See comments + for `final_presence_set'. */ +DEF_RTL_EXPR(FINAL_ABSENCE_SET, "final_absence_set", "ss", 'x') + /* (define_bypass number out_insn_names in_insn_names) names bypass with given latency (the first number) from insns given by the first string (see define_insn_reservation) into insns given by the second @@ -416,8 +467,8 @@ DEF_RTL_EXPR(DEFINE_AUTOMATON, "define_automaton", "s", 'x') automata. Currently there are the following options: o "no-minimization" which makes no minimization of automata. This - is only worth to do when we are going to query CPU functional - unit reservations in an automaton state. + is only worth to do when we are debugging the description and + need to look more accurately at reservations of states. o "time" which means printing additional time statistics about generation of automata. diff --git a/gcc/sched-deps.c b/gcc/sched-deps.c index ec3df2c1596..a735125495c 100644 --- a/gcc/sched-deps.c +++ b/gcc/sched-deps.c @@ -83,14 +83,12 @@ static sbitmap *forward_dependency_cache; static int deps_may_trap_p PARAMS ((rtx)); static void add_dependence_list PARAMS ((rtx, rtx, enum reg_note)); static void add_dependence_list_and_free PARAMS ((rtx, rtx *, enum reg_note)); -static void remove_dependence PARAMS ((rtx, rtx)); static void set_sched_group_p PARAMS ((rtx)); static void flush_pending_lists PARAMS ((struct deps *, rtx, int, int)); static void sched_analyze_1 PARAMS ((struct deps *, rtx, rtx)); static void sched_analyze_2 PARAMS ((struct deps *, rtx, rtx)); static void sched_analyze_insn PARAMS ((struct deps *, rtx, rtx, rtx)); -static rtx group_leader PARAMS ((rtx)); static rtx get_condition PARAMS ((rtx)); static int conditions_mutex_p PARAMS ((rtx, rtx)); @@ -237,18 +235,16 @@ add_dependence (insn, elem, dep_type) rtx nnext; while ((nnext = next_nonnote_insn (next)) != NULL && INSN_P (nnext) + && next != insn && SCHED_GROUP_P (nnext)) next = nnext; - /* Again, don't depend an insn on itself. */ - if (insn == next) - return; + if (insn != next) + add_dependence (insn, next, REG_DEP_ANTI); - /* Make the dependence to NEXT, the last insn of the group, instead - of the original ELEM. */ - elem = next; } + present_p = 1; #ifdef INSN_SCHEDULING /* ??? No good way to tell from here whether we're doing interblock @@ -384,76 +380,6 @@ add_dependence_list_and_free (insn, listp, dep_type) } } -/* Remove ELEM wrapped in an INSN_LIST from the LOG_LINKS - of INSN. Abort if not found. */ - -static void -remove_dependence (insn, elem) - rtx insn; - rtx elem; -{ - rtx prev, link, next; - int found = 0; - - for (prev = 0, link = LOG_LINKS (insn); link; link = next) - { - next = XEXP (link, 1); - if (XEXP (link, 0) == elem) - { - if (prev) - XEXP (prev, 1) = next; - else - LOG_LINKS (insn) = next; - -#ifdef INSN_SCHEDULING - /* If we are removing a dependency from the LOG_LINKS list, - make sure to remove it from the cache too. */ - if (true_dependency_cache != NULL) - { - if (REG_NOTE_KIND (link) == 0) - RESET_BIT (true_dependency_cache[INSN_LUID (insn)], - INSN_LUID (elem)); - else if (REG_NOTE_KIND (link) == REG_DEP_ANTI) - RESET_BIT (anti_dependency_cache[INSN_LUID (insn)], - INSN_LUID (elem)); - else if (REG_NOTE_KIND (link) == REG_DEP_OUTPUT) - RESET_BIT (output_dependency_cache[INSN_LUID (insn)], - INSN_LUID (elem)); - } -#endif - - free_INSN_LIST_node (link); - - found = 1; - } - else - prev = link; - } - - if (!found) - abort (); - return; -} - -/* Return an insn which represents a SCHED_GROUP, which is - the last insn in the group. */ - -static rtx -group_leader (insn) - rtx insn; -{ - rtx prev; - - do - { - prev = insn; - insn = next_nonnote_insn (insn); - } - while (insn && INSN_P (insn) && SCHED_GROUP_P (insn)); - - return prev; -} - /* Set SCHED_GROUP_P and care for the rest of the bookkeeping that goes along with that. */ @@ -465,21 +391,21 @@ set_sched_group_p (insn) SCHED_GROUP_P (insn) = 1; - /* There may be a note before this insn now, but all notes will - be removed before we actually try to schedule the insns, so - it won't cause a problem later. We must avoid it here though. */ + for (link = LOG_LINKS (insn); link; link = XEXP (link, 1)) + { + prev = insn; + do + { + prev = prev_nonnote_insn (prev); + if (XEXP (link, 0) == prev) + break; + } + while (SCHED_GROUP_P (prev)); + if (XEXP (link, 0) != prev) + add_dependence (prev, XEXP (link, 0), REG_DEP_ANTI); + } prev = prev_nonnote_insn (insn); - - /* Make a copy of all dependencies on the immediately previous insn, - and add to this insn. This is so that all the dependencies will - apply to the group. Remove an explicit dependence on this insn - as SCHED_GROUP_P now represents it. */ - - if (find_insn_list (prev, LOG_LINKS (insn))) - remove_dependence (insn, prev); - - for (link = LOG_LINKS (prev); link; link = XEXP (link, 1)) - add_dependence (insn, XEXP (link, 0), REG_NOTE_KIND (link)); + add_dependence (insn, prev, REG_DEP_ANTI); } /* Process an insn's memory dependencies. There are four kinds of @@ -983,7 +909,15 @@ sched_analyze_insn (deps, x, insn, loop_notes) INIT_REG_SET (&tmp); (*current_sched_info->compute_jump_reg_dependencies) (insn, &tmp); - IOR_REG_SET (reg_pending_uses, &tmp); + /* Make latency of jump equal to 0 by using anti-dependence. */ + EXECUTE_IF_SET_IN_REG_SET (&tmp, 0, i, + { + struct deps_reg *reg_last = &deps->reg_last[i]; + add_dependence_list (insn, reg_last->sets, REG_DEP_ANTI); + add_dependence_list (insn, reg_last->clobbers, REG_DEP_ANTI); + reg_last->uses_length++; + reg_last->uses = alloc_INSN_LIST (insn, reg_last->uses); + }); CLEAR_REG_SET (&tmp); /* All memory writes and volatile reads must happen before the @@ -1049,14 +983,16 @@ sched_analyze_insn (deps, x, insn, loop_notes) /* Add dependencies if a scheduling barrier was found. */ if (reg_pending_barrier) { + /* In the case of barrier the most added dependencies are not + real, so we use anti-dependence here. */ if (GET_CODE (PATTERN (insn)) == COND_EXEC) { EXECUTE_IF_SET_IN_REG_SET (&deps->reg_last_in_use, 0, i, { struct deps_reg *reg_last = &deps->reg_last[i]; add_dependence_list (insn, reg_last->uses, REG_DEP_ANTI); - add_dependence_list (insn, reg_last->sets, 0); - add_dependence_list (insn, reg_last->clobbers, 0); + add_dependence_list (insn, reg_last->sets, REG_DEP_ANTI); + add_dependence_list (insn, reg_last->clobbers, REG_DEP_ANTI); }); } else @@ -1066,8 +1002,10 @@ sched_analyze_insn (deps, x, insn, loop_notes) struct deps_reg *reg_last = &deps->reg_last[i]; add_dependence_list_and_free (insn, ®_last->uses, REG_DEP_ANTI); - add_dependence_list_and_free (insn, ®_last->sets, 0); - add_dependence_list_and_free (insn, ®_last->clobbers, 0); + add_dependence_list_and_free (insn, ®_last->sets, + REG_DEP_ANTI); + add_dependence_list_and_free (insn, ®_last->clobbers, + REG_DEP_ANTI); reg_last->uses_length = 0; reg_last->clobbers_length = 0; }); @@ -1432,11 +1370,9 @@ compute_forward_dependences (head, tail) if (! INSN_P (insn)) continue; - insn = group_leader (insn); - for (link = LOG_LINKS (insn); link; link = XEXP (link, 1)) { - rtx x = group_leader (XEXP (link, 0)); + rtx x = XEXP (link, 0); rtx new_link; if (x != XEXP (link, 0)) diff --git a/gcc/sched-ebb.c b/gcc/sched-ebb.c index 3542b5d3714..29189a68d0c 100644 --- a/gcc/sched-ebb.c +++ b/gcc/sched-ebb.c @@ -40,6 +40,7 @@ Software Foundation, 59 Temple Place - Suite 330, Boston, MA #include "recog.h" #include "cfglayout.h" #include "sched-int.h" +#include "target.h" /* The number of insns to be scheduled in total. */ static int target_n_insns; @@ -89,14 +90,7 @@ init_ready_list (ready) Count number of insns in the target block being scheduled. */ for (insn = NEXT_INSN (prev_head); insn != next_tail; insn = NEXT_INSN (insn)) { - rtx next; - - if (! INSN_P (insn)) - continue; - next = NEXT_INSN (insn); - - if (INSN_DEP_COUNT (insn) == 0 - && (! INSN_P (next) || SCHED_GROUP_P (next) == 0)) + if (INSN_DEP_COUNT (insn) == 0) ready_add (ready, insn); if (!(SCHED_GROUP_P (insn))) target_n_insns++; @@ -222,6 +216,9 @@ schedule_ebb (head, tail) /* Compute INSN_DEPEND. */ compute_forward_dependences (head, tail); + if (targetm.sched.dependencies_evaluation_hook) + targetm.sched.dependencies_evaluation_hook (head, tail); + /* Set priorities. */ n_insns = set_priorities (head, tail); diff --git a/gcc/sched-rgn.c b/gcc/sched-rgn.c index 43fdef72cd8..36a53f73c26 100644 --- a/gcc/sched-rgn.c +++ b/gcc/sched-rgn.c @@ -2023,17 +2023,9 @@ init_ready_list (ready) Count number of insns in the target block being scheduled. */ for (insn = NEXT_INSN (prev_head); insn != next_tail; insn = NEXT_INSN (insn)) { - rtx next; - - if (! INSN_P (insn)) - continue; - next = NEXT_INSN (insn); - - if (INSN_DEP_COUNT (insn) == 0 - && (! INSN_P (next) || SCHED_GROUP_P (next) == 0)) + if (INSN_DEP_COUNT (insn) == 0) ready_add (ready, insn); - if (!(SCHED_GROUP_P (insn))) - target_n_insns++; + target_n_insns++; } /* Add to ready list all 'ready' insns in valid source blocks. @@ -2067,19 +2059,8 @@ init_ready_list (ready) insn, insn) <= 3))) && check_live (insn, bb_src) && is_exception_free (insn, bb_src, target_bb)))) - { - rtx next; - - /* Note that we haven't squirreled away the notes for - blocks other than the current. So if this is a - speculative insn, NEXT might otherwise be a note. */ - next = next_nonnote_insn (insn); - if (INSN_DEP_COUNT (insn) == 0 - && (! next - || ! INSN_P (next) - || SCHED_GROUP_P (next) == 0)) - ready_add (ready, insn); - } + if (INSN_DEP_COUNT (insn) == 0) + ready_add (ready, insn); } } } @@ -2097,7 +2078,6 @@ can_schedule_ready_p (insn) /* An interblock motion? */ if (INSN_BB (insn) != target_bb) { - rtx temp; basic_block b1; if (IS_SPECULATIVE_INSN (insn)) @@ -2114,18 +2094,9 @@ can_schedule_ready_p (insn) } nr_inter++; - /* Find the beginning of the scheduling group. */ - /* ??? Ought to update basic block here, but later bits of - schedule_block assumes the original insn block is - still intact. */ - - temp = insn; - while (SCHED_GROUP_P (temp)) - temp = PREV_INSN (temp); - /* Update source block boundaries. */ - b1 = BLOCK_FOR_INSN (temp); - if (temp == b1->head && insn == b1->end) + b1 = BLOCK_FOR_INSN (insn); + if (insn == b1->head && insn == b1->end) { /* We moved all the insns in the basic block. Emit a note after the last insn and update the @@ -2139,9 +2110,9 @@ can_schedule_ready_p (insn) /* We took insns from the end of the basic block, so update the end of block boundary so that it points to the first insn we did not move. */ - b1->end = PREV_INSN (temp); + b1->end = PREV_INSN (insn); } - else if (temp == b1->head) + else if (insn == b1->head) { /* We took insns from the start of the basic block, so update the start of block boundary so that @@ -2361,17 +2332,6 @@ add_branch_dependences (head, tail) CANT_MOVE (insn) = 1; last = insn; - /* Skip over insns that are part of a group. - Make each insn explicitly depend on the previous insn. - This ensures that only the group header will ever enter - the ready queue (and, when scheduled, will automatically - schedule the SCHED_GROUP_P block). */ - while (SCHED_GROUP_P (insn)) - { - rtx temp = prev_nonnote_insn (insn); - add_dependence (insn, temp, REG_DEP_ANTI); - insn = temp; - } } /* Don't overrun the bounds of the basic block. */ @@ -2393,10 +2353,6 @@ add_branch_dependences (head, tail) add_dependence (last, insn, REG_DEP_ANTI); INSN_REF_COUNT (insn) = 1; - - /* Skip over insns that are part of a group. */ - while (SCHED_GROUP_P (insn)) - insn = prev_nonnote_insn (insn); } } @@ -2728,6 +2684,10 @@ schedule_region (rgn) get_block_head_tail (BB_TO_BLOCK (bb), &head, &tail); compute_forward_dependences (head, tail); + + if (targetm.sched.dependencies_evaluation_hook) + targetm.sched.dependencies_evaluation_hook (head, tail); + } /* Set priorities. */ diff --git a/gcc/target-def.h b/gcc/target-def.h index 5842863245e..0cf463b9ce5 100644 --- a/gcc/target-def.h +++ b/gcc/target-def.h @@ -194,12 +194,15 @@ Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. #define TARGET_SCHED_FINISH 0 #define TARGET_SCHED_REORDER 0 #define TARGET_SCHED_REORDER2 0 +#define TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK 0 #define TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE 0 #define TARGET_SCHED_INIT_DFA_PRE_CYCLE_INSN 0 #define TARGET_SCHED_DFA_PRE_CYCLE_INSN 0 #define TARGET_SCHED_INIT_DFA_POST_CYCLE_INSN 0 #define TARGET_SCHED_DFA_POST_CYCLE_INSN 0 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD 0 +#define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD 0 +#define TARGET_SCHED_DFA_NEW_CYCLE 0 #define TARGET_SCHED_INIT_DFA_BUBBLES 0 #define TARGET_SCHED_DFA_BUBBLE 0 @@ -212,12 +215,15 @@ Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. TARGET_SCHED_FINISH, \ TARGET_SCHED_REORDER, \ TARGET_SCHED_REORDER2, \ + TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK, \ TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE, \ TARGET_SCHED_INIT_DFA_PRE_CYCLE_INSN, \ TARGET_SCHED_DFA_PRE_CYCLE_INSN, \ TARGET_SCHED_INIT_DFA_POST_CYCLE_INSN, \ TARGET_SCHED_DFA_POST_CYCLE_INSN, \ TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD, \ + TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD, \ + TARGET_SCHED_DFA_NEW_CYCLE, \ TARGET_SCHED_INIT_DFA_BUBBLES, \ TARGET_SCHED_DFA_BUBBLE} diff --git a/gcc/target.h b/gcc/target.h index 98e32d5791a..770830079c1 100644 --- a/gcc/target.h +++ b/gcc/target.h @@ -177,6 +177,11 @@ struct gcc_target int (* reorder) PARAMS ((FILE *, int, rtx *, int *, int)); int (* reorder2) PARAMS ((FILE *, int, rtx *, int *, int)); + /* The following member value is a pointer to a function called + after evaluation forward dependencies of insns in chain given + by two parameter values (head and tail correspondingly). */ + void (* dependencies_evaluation_hook) PARAMS ((rtx, rtx)); + /* The following member value is a pointer to a function returning nonzero if we should use DFA based scheduling. The default is to use the old pipeline scheduler. */ @@ -206,6 +211,25 @@ struct gcc_target try to choose ready insn which permits to start maximum number of insns on the same cycle. */ int (* first_cycle_multipass_dfa_lookahead) PARAMS ((void)); + /* The following member value is pointer to a function controlling + what insns from the ready insn queue will be considered for the + multipass insn scheduling. If the hook returns zero for insn + passed as the parameter, the insn will be not chosen to be + issued. */ + int (* first_cycle_multipass_dfa_lookahead_guard) PARAMS ((rtx)); + /* The following member value is pointer to a function called by + the insn scheduler before issuing insn passed as the third + parameter on given cycle. If the hook returns nonzero, the + insn is not issued on given processors cycle. Instead of that, + the processor cycle is advanced. If the value passed through + the last parameter is zero, the insn ready queue is not sorted + on the new cycle start as usually. The first parameter passes + file for debugging output. The second one passes the scheduler + verbose level of the debugging output. The forth and the fifth + parameter values are correspondingly processor cycle on which + the previous insn has been issued and the current processor + cycle. */ + int (* dfa_new_cycle) PARAMS ((FILE *, int, rtx, int, int, int *)); /* The values of the following members are pointers to functions used to improve the first cycle multipass scheduling by inserting nop insns. dfa_scheduler_bubble gives a function |