diff options
author | vmakarov <vmakarov@138bc75d-0d04-0410-961f-82ee72b054a4> | 2003-01-09 23:15:34 +0000 |
---|---|---|
committer | vmakarov <vmakarov@138bc75d-0d04-0410-961f-82ee72b054a4> | 2003-01-09 23:15:34 +0000 |
commit | 58ada791d3cb97df7eae8ab3db29f9a5d4149e79 (patch) | |
tree | 4c2dc43818bfc1ad93057e3973541f95b57fd3cc /gcc/config/ia64 | |
parent | e7bf79cf831a76f2e0d6c514f704aebcb6c389e8 (diff) | |
download | gcc-58ada791d3cb97df7eae8ab3db29f9a5d4149e79.tar.gz |
2003-01-09 Vladimir Makarov <vmakarov@redhat.com>
Merging changes from itanium-sched-branch:
git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@61132 138bc75d-0d04-0410-961f-82ee72b054a4
Diffstat (limited to 'gcc/config/ia64')
-rw-r--r-- | gcc/config/ia64/ia64-protos.h | 4 | ||||
-rw-r--r-- | gcc/config/ia64/ia64.c | 2665 | ||||
-rw-r--r-- | gcc/config/ia64/ia64.h | 30 | ||||
-rw-r--r-- | gcc/config/ia64/ia64.md | 103 | ||||
-rw-r--r-- | gcc/config/ia64/itanium1.md | 1616 | ||||
-rw-r--r-- | gcc/config/ia64/itanium2.md | 1762 |
6 files changed, 4821 insertions, 1359 deletions
diff --git a/gcc/config/ia64/ia64-protos.h b/gcc/config/ia64/ia64-protos.h index 0aba9390aae..63b8572067e 100644 --- a/gcc/config/ia64/ia64-protos.h +++ b/gcc/config/ia64/ia64-protos.h @@ -27,7 +27,11 @@ extern GTY(()) rtx ia64_compare_op1; /* Functions defined in ia64.c */ +extern int bundling_p; #ifdef RTX_CODE +extern int ia64_st_address_bypass_p PARAMS((rtx, rtx)); +extern int ia64_ld_address_bypass_p PARAMS((rtx, rtx)); +extern int ia64_produce_address_p PARAMS((rtx)); extern int call_operand PARAMS((rtx, enum machine_mode)); extern int sdata_symbolic_operand PARAMS((rtx, enum machine_mode)); extern int got_symbolic_operand PARAMS((rtx, enum machine_mode)); diff --git a/gcc/config/ia64/ia64.c b/gcc/config/ia64/ia64.c index 3009d1c1200..039d11aaac7 100644 --- a/gcc/config/ia64/ia64.c +++ b/gcc/config/ia64/ia64.c @@ -47,6 +47,7 @@ Boston, MA 02111-1307, USA. */ #include "target.h" #include "target-def.h" #include "tm_p.h" +#include "hashtab.h" /* This is used for communication between ASM_OUTPUT_LABEL and ASM_OUTPUT_LABELREF. */ @@ -103,6 +104,12 @@ int ia64_tls_size = 22; /* String used with the -mtls-size= option. */ const char *ia64_tls_size_string; +/* Which cpu are we scheduling for. */ +enum processor_type ia64_tune; + +/* String used with the -tune= option. */ +const char *ia64_tune_string; + /* Determines whether we run our final scheduling pass or not. We always avoid the normal second scheduling pass. */ static int ia64_flag_schedule_insns2; @@ -111,7 +118,19 @@ static int ia64_flag_schedule_insns2; sections. */ unsigned int ia64_section_threshold; + +/* The following variable is used by the DFA insn scheduler. The value is + TRUE if we do insn bundling instead of insn scheduling. */ +int bundling_p = 0; + +static int ia64_use_dfa_pipeline_interface PARAMS ((void)); +static int ia64_first_cycle_multipass_dfa_lookahead PARAMS ((void)); +static void ia64_dependencies_evaluation_hook PARAMS ((rtx, rtx)); +static void ia64_init_dfa_pre_cycle_insn PARAMS ((void)); +static rtx ia64_dfa_pre_cycle_insn PARAMS ((void)); +static int ia64_first_cycle_multipass_dfa_lookahead_guard PARAMS ((rtx)); +static int ia64_dfa_new_cycle PARAMS ((FILE *, int, rtx, int, int, int *)); static rtx gen_tls_get_addr PARAMS ((void)); static rtx gen_thread_pointer PARAMS ((void)); static int find_gr_spill PARAMS ((int)); @@ -132,6 +151,7 @@ static void fix_range PARAMS ((const char *)); static struct machine_function * ia64_init_machine_status PARAMS ((void)); static void emit_insn_group_barriers PARAMS ((FILE *, rtx)); static void emit_all_insn_group_barriers PARAMS ((FILE *, rtx)); +static void final_emit_insn_group_barriers PARAMS ((FILE *)); static void emit_predicate_relation_info PARAMS ((void)); static bool ia64_in_small_data_p PARAMS ((tree)); static void ia64_encode_section_info PARAMS ((tree, int)); @@ -157,12 +177,31 @@ static int ia64_issue_rate PARAMS ((void)); static int ia64_adjust_cost PARAMS ((rtx, rtx, rtx, int)); static void ia64_sched_init PARAMS ((FILE *, int, int)); static void ia64_sched_finish PARAMS ((FILE *, int)); -static int ia64_internal_sched_reorder PARAMS ((FILE *, int, rtx *, - int *, int, int)); +static int ia64_dfa_sched_reorder PARAMS ((FILE *, int, rtx *, int *, + int, int)); static int ia64_sched_reorder PARAMS ((FILE *, int, rtx *, int *, int)); static int ia64_sched_reorder2 PARAMS ((FILE *, int, rtx *, int *, int)); static int ia64_variable_issue PARAMS ((FILE *, int, rtx, int)); +static struct bundle_state *get_free_bundle_state PARAMS ((void)); +static void free_bundle_state PARAMS ((struct bundle_state *)); +static void initiate_bundle_states PARAMS ((void)); +static void finish_bundle_states PARAMS ((void)); +static unsigned bundle_state_hash PARAMS ((const void *)); +static int bundle_state_eq_p PARAMS ((const void *, const void *)); +static int insert_bundle_state PARAMS ((struct bundle_state *)); +static void initiate_bundle_state_table PARAMS ((void)); +static void finish_bundle_state_table PARAMS ((void)); +static int try_issue_nops PARAMS ((struct bundle_state *, int)); +static int try_issue_insn PARAMS ((struct bundle_state *, rtx)); +static void issue_nops_and_insn PARAMS ((struct bundle_state *, int, + rtx, int)); +static int get_max_pos PARAMS ((state_t)); +static int get_template PARAMS ((state_t, int)); + +static rtx get_next_important_insn PARAMS ((rtx, rtx)); +static void bundling PARAMS ((FILE *, int, rtx, rtx)); + static void ia64_output_mi_thunk PARAMS ((FILE *, tree, HOST_WIDE_INT, HOST_WIDE_INT, tree)); @@ -244,6 +283,27 @@ static const struct attribute_spec ia64_attribute_table[] = #undef TARGET_SCHED_REORDER2 #define TARGET_SCHED_REORDER2 ia64_sched_reorder2 +#undef TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK +#define TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK ia64_dependencies_evaluation_hook + +#undef TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE +#define TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE ia64_use_dfa_pipeline_interface + +#undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD +#define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD ia64_first_cycle_multipass_dfa_lookahead + +#undef TARGET_SCHED_INIT_DFA_PRE_CYCLE_INSN +#define TARGET_SCHED_INIT_DFA_PRE_CYCLE_INSN ia64_init_dfa_pre_cycle_insn +#undef TARGET_SCHED_DFA_PRE_CYCLE_INSN +#define TARGET_SCHED_DFA_PRE_CYCLE_INSN ia64_dfa_pre_cycle_insn + +#undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD +#define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD\ + ia64_first_cycle_multipass_dfa_lookahead_guard + +#undef TARGET_SCHED_DFA_NEW_CYCLE +#define TARGET_SCHED_DFA_NEW_CYCLE ia64_dfa_new_cycle + #ifdef HAVE_AS_TLS #undef TARGET_HAVE_TLS #define TARGET_HAVE_TLS true @@ -4209,6 +4269,23 @@ ia64_init_machine_status () void ia64_override_options () { + static struct pta + { + const char *const name; /* processor name or nickname. */ + const enum processor_type processor; + } + const processor_alias_table[] = + { + {"itanium", PROCESSOR_ITANIUM}, + {"itanium1", PROCESSOR_ITANIUM}, + {"merced", PROCESSOR_ITANIUM}, + {"itanium2", PROCESSOR_ITANIUM2}, + {"mckinley", PROCESSOR_ITANIUM2}, + }; + + int const pta_size = ARRAY_SIZE (processor_alias_table); + int i; + if (TARGET_AUTO_PIC) target_flags |= MASK_CONST_GP; @@ -4237,6 +4314,19 @@ ia64_override_options () ia64_tls_size = tmp; } + if (!ia64_tune_string) + ia64_tune_string = "itanium2"; + + for (i = 0; i < pta_size; i++) + if (! strcmp (ia64_tune_string, processor_alias_table[i].name)) + { + ia64_tune = processor_alias_table[i].processor; + break; + } + + if (i == pta_size) + error ("bad value (%s) for -tune= switch", ia64_tune_string); + ia64_flag_schedule_insns2 = flag_schedule_insns_after_reload; flag_schedule_insns_after_reload = 0; @@ -4249,20 +4339,9 @@ ia64_override_options () real_format_for_mode[TFmode - QFmode] = &ieee_extended_intel_128_format; } -static enum attr_itanium_requires_unit0 ia64_safe_itanium_requires_unit0 PARAMS((rtx)); static enum attr_itanium_class ia64_safe_itanium_class PARAMS((rtx)); static enum attr_type ia64_safe_type PARAMS((rtx)); -static enum attr_itanium_requires_unit0 -ia64_safe_itanium_requires_unit0 (insn) - rtx insn; -{ - if (recog_memoized (insn) >= 0) - return get_attr_itanium_requires_unit0 (insn); - else - return ITANIUM_REQUIRES_UNIT0_NO; -} - static enum attr_itanium_class ia64_safe_itanium_class (insn) rtx insn; @@ -5096,7 +5175,10 @@ group_barrier_needed_p (insn) abort (); } - if (first_instruction) + if (first_instruction && INSN_P (insn) + && ia64_safe_itanium_class (insn) != ITANIUM_CLASS_IGNORE + && GET_CODE (PATTERN (insn)) != USE + && GET_CODE (PATTERN (insn)) != CLOBBER) { need_barrier = 0; first_instruction = 0; @@ -5229,6 +5311,7 @@ emit_all_insn_group_barriers (dump, insns) } } } + static int errata_find_address_regs PARAMS ((rtx *, void *)); static void errata_emit_nops PARAMS ((rtx)); @@ -5374,92 +5457,92 @@ fixup_errata () } } -/* Instruction scheduling support. */ -/* Describe one bundle. */ -struct bundle -{ - /* Zero if there's no possibility of a stop in this bundle other than - at the end, otherwise the position of the optional stop bit. */ - int possible_stop; - /* The types of the three slots. */ - enum attr_type t[3]; - /* The pseudo op to be emitted into the assembler output. */ - const char *name; -}; +/* Instruction scheduling support. */ #define NR_BUNDLES 10 -/* A list of all available bundles. */ +/* A list of names of all available bundles. */ -static const struct bundle bundle[NR_BUNDLES] = +static const char *bundle_name [NR_BUNDLES] = { - { 2, { TYPE_M, TYPE_I, TYPE_I }, ".mii" }, - { 1, { TYPE_M, TYPE_M, TYPE_I }, ".mmi" }, - { 0, { TYPE_M, TYPE_F, TYPE_I }, ".mfi" }, - { 0, { TYPE_M, TYPE_M, TYPE_F }, ".mmf" }, + ".mii", + ".mmi", + ".mfi", + ".mmf", #if NR_BUNDLES == 10 - { 0, { TYPE_B, TYPE_B, TYPE_B }, ".bbb" }, - { 0, { TYPE_M, TYPE_B, TYPE_B }, ".mbb" }, + ".bbb", + ".mbb", #endif - { 0, { TYPE_M, TYPE_I, TYPE_B }, ".mib" }, - { 0, { TYPE_M, TYPE_M, TYPE_B }, ".mmb" }, - { 0, { TYPE_M, TYPE_F, TYPE_B }, ".mfb" }, - /* .mfi needs to occur earlier than .mlx, so that we only generate it if - it matches an L type insn. Otherwise we'll try to generate L type - nops. */ - { 0, { TYPE_M, TYPE_L, TYPE_X }, ".mlx" } + ".mib", + ".mmb", + ".mfb", + ".mlx" }; -/* Describe a packet of instructions. Packets consist of two bundles that - are visible to the hardware in one scheduling window. */ +/* Nonzero if we should insert stop bits into the schedule. */ -struct ia64_packet -{ - const struct bundle *t1, *t2; - /* Precomputed value of the first split issue in this packet if a cycle - starts at its beginning. */ - int first_split; - /* For convenience, the insn types are replicated here so we don't have - to go through T1 and T2 all the time. */ - enum attr_type t[6]; -}; +int ia64_final_schedule = 0; -/* An array containing all possible packets. */ -#define NR_PACKETS (NR_BUNDLES * NR_BUNDLES) -static struct ia64_packet packets[NR_PACKETS]; +/* Codes of the corrsponding quieryied units: */ -/* Map attr_type to a string with the name. */ +static int _0mii_, _0mmi_, _0mfi_, _0mmf_; +static int _0bbb_, _0mbb_, _0mib_, _0mmb_, _0mfb_, _0mlx_; -static const char *const type_names[] = -{ - "UNKNOWN", "A", "I", "M", "F", "B", "L", "X", "S" -}; +static int _1mii_, _1mmi_, _1mfi_, _1mmf_; +static int _1bbb_, _1mbb_, _1mib_, _1mmb_, _1mfb_, _1mlx_; -/* Nonzero if we should insert stop bits into the schedule. */ -int ia64_final_schedule = 0; +static int pos_1, pos_2, pos_3, pos_4, pos_5, pos_6; + +/* The following variable value is an insn group barrier. */ + +static rtx dfa_stop_insn; + +/* The following variable value is the last issued insn. */ + +static rtx last_scheduled_insn; + +/* The following variable value is size of the DFA state. */ + +static size_t dfa_state_size; + +/* The following variable value is pointer to a DFA state used as + temporary variable. */ + +static state_t temp_dfa_state = NULL; + +/* The following variable value is DFA state after issuing the last + insn. */ + +static state_t prev_cycle_state = NULL; + +/* The following array element values are TRUE if the corresponding + insn reuqires to add stop bits before it. */ + +static char *stops_p; + +/* The following variable is used to set up the mentioned above array. */ + +static int stop_before_p = 0; + +/* The following variable value is length of the arrays `clocks' and + `add_cycles'. */ + +static int clocks_length; + +/* The following array element values are cycles on which the + corresponding insn will be issued. The array is used only for + Itanium1. */ + +static int *clocks; + +/* The following array element values are numbers of cycles should be + added to improve insn scheduling for MM_insns for Itanium1. */ + +static int *add_cycles; -static int itanium_split_issue PARAMS ((const struct ia64_packet *, int)); static rtx ia64_single_set PARAMS ((rtx)); -static int insn_matches_slot PARAMS ((const struct ia64_packet *, enum attr_type, int, rtx)); static void ia64_emit_insn_before PARAMS ((rtx, rtx)); -static void maybe_rotate PARAMS ((FILE *)); -static void finish_last_head PARAMS ((FILE *, int)); -static void rotate_one_bundle PARAMS ((FILE *)); -static void rotate_two_bundles PARAMS ((FILE *)); -static void nop_cycles_until PARAMS ((int, FILE *)); -static void cycle_end_fill_slots PARAMS ((FILE *)); -static int packet_matches_p PARAMS ((const struct ia64_packet *, int, int *)); -static int get_split PARAMS ((const struct ia64_packet *, int)); -static int find_best_insn PARAMS ((rtx *, enum attr_type *, int, - const struct ia64_packet *, int)); -static void find_best_packet PARAMS ((int *, const struct ia64_packet **, - rtx *, enum attr_type *, int)); -static int itanium_reorder PARAMS ((FILE *, rtx *, rtx *, int)); -static void dump_current_packet PARAMS ((FILE *)); -static void schedule_stop PARAMS ((FILE *)); -static rtx gen_nop_type PARAMS ((enum attr_type)); -static void ia64_emit_nops PARAMS ((void)); /* Map a bundle number to its pseudo-op. */ @@ -5467,55 +5550,9 @@ const char * get_bundle_name (b) int b; { - return bundle[b].name; + return bundle_name[b]; } -/* Compute the slot which will cause a split issue in packet P if the - current cycle begins at slot BEGIN. */ - -static int -itanium_split_issue (p, begin) - const struct ia64_packet *p; - int begin; -{ - int type_count[TYPE_S]; - int i; - int split = 6; - - if (begin < 3) - { - /* Always split before and after MMF. */ - if (p->t[0] == TYPE_M && p->t[1] == TYPE_M && p->t[2] == TYPE_F) - return 3; - if (p->t[3] == TYPE_M && p->t[4] == TYPE_M && p->t[5] == TYPE_F) - return 3; - /* Always split after MBB and BBB. */ - if (p->t[1] == TYPE_B) - return 3; - /* Split after first bundle in MIB BBB combination. */ - if (p->t[2] == TYPE_B && p->t[3] == TYPE_B) - return 3; - } - - memset (type_count, 0, sizeof type_count); - for (i = begin; i < split; i++) - { - enum attr_type t0 = p->t[i]; - /* An MLX bundle reserves the same units as an MFI bundle. */ - enum attr_type t = (t0 == TYPE_L ? TYPE_F - : t0 == TYPE_X ? TYPE_I - : t0); - - /* Itanium can execute up to 3 branches, 2 floating point, 2 memory, and - 2 integer per cycle. */ - int max = (t == TYPE_B ? 3 : 2); - if (type_count[t] == max) - return i; - - type_count[t]++; - } - return split; -} /* Return the maximum number of instructions a cpu can issue. */ @@ -5563,208 +5600,21 @@ ia64_adjust_cost (insn, link, dep_insn, cost) rtx insn, link, dep_insn; int cost; { - enum attr_type dep_type; enum attr_itanium_class dep_class; enum attr_itanium_class insn_class; - rtx dep_set, set, src, addr; - - if (GET_CODE (PATTERN (insn)) == CLOBBER - || GET_CODE (PATTERN (insn)) == USE - || GET_CODE (PATTERN (dep_insn)) == CLOBBER - || GET_CODE (PATTERN (dep_insn)) == USE - /* @@@ Not accurate for indirect calls. */ - || GET_CODE (insn) == CALL_INSN - || ia64_safe_type (insn) == TYPE_S) - return 0; - if (REG_NOTE_KIND (link) == REG_DEP_OUTPUT - || REG_NOTE_KIND (link) == REG_DEP_ANTI) - return 0; + if (REG_NOTE_KIND (link) != REG_DEP_OUTPUT) + return cost; - dep_type = ia64_safe_type (dep_insn); - dep_class = ia64_safe_itanium_class (dep_insn); insn_class = ia64_safe_itanium_class (insn); - - /* Compares that feed a conditional branch can execute in the same - cycle. */ - dep_set = ia64_single_set (dep_insn); - set = ia64_single_set (insn); - - if (dep_type != TYPE_F - && dep_set - && GET_CODE (SET_DEST (dep_set)) == REG - && PR_REG (REGNO (SET_DEST (dep_set))) - && GET_CODE (insn) == JUMP_INSN) + dep_class = ia64_safe_itanium_class (dep_insn); + if (dep_class == ITANIUM_CLASS_ST || dep_class == ITANIUM_CLASS_STF + || insn_class == ITANIUM_CLASS_ST || insn_class == ITANIUM_CLASS_STF) return 0; - if (dep_set && GET_CODE (SET_DEST (dep_set)) == MEM) - { - /* ??? Can't find any information in the documenation about whether - a sequence - st [rx] = ra - ld rb = [ry] - splits issue. Assume it doesn't. */ - return 0; - } - - src = set ? SET_SRC (set) : 0; - addr = 0; - if (set) - { - if (GET_CODE (SET_DEST (set)) == MEM) - addr = XEXP (SET_DEST (set), 0); - else if (GET_CODE (SET_DEST (set)) == SUBREG - && GET_CODE (SUBREG_REG (SET_DEST (set))) == MEM) - addr = XEXP (SUBREG_REG (SET_DEST (set)), 0); - else - { - addr = src; - if (GET_CODE (addr) == UNSPEC && XVECLEN (addr, 0) > 0) - addr = XVECEXP (addr, 0, 0); - while (GET_CODE (addr) == SUBREG || GET_CODE (addr) == ZERO_EXTEND) - addr = XEXP (addr, 0); - if (GET_CODE (addr) == MEM) - addr = XEXP (addr, 0); - else - addr = 0; - } - } - - if (addr && GET_CODE (addr) == POST_MODIFY) - addr = XEXP (addr, 0); - - set = ia64_single_set (dep_insn); - - if ((dep_class == ITANIUM_CLASS_IALU - || dep_class == ITANIUM_CLASS_ILOG - || dep_class == ITANIUM_CLASS_LD) - && (insn_class == ITANIUM_CLASS_LD - || insn_class == ITANIUM_CLASS_ST)) - { - if (! addr || ! set) - abort (); - /* This isn't completely correct - an IALU that feeds an address has - a latency of 1 cycle if it's issued in an M slot, but 2 cycles - otherwise. Unfortunately there's no good way to describe this. */ - if (reg_overlap_mentioned_p (SET_DEST (set), addr)) - return cost + 1; - } - - if ((dep_class == ITANIUM_CLASS_IALU - || dep_class == ITANIUM_CLASS_ILOG - || dep_class == ITANIUM_CLASS_LD) - && (insn_class == ITANIUM_CLASS_MMMUL - || insn_class == ITANIUM_CLASS_MMSHF - || insn_class == ITANIUM_CLASS_MMSHFI)) - return 3; - - if (dep_class == ITANIUM_CLASS_FMAC - && (insn_class == ITANIUM_CLASS_FMISC - || insn_class == ITANIUM_CLASS_FCVTFX - || insn_class == ITANIUM_CLASS_XMPY)) - return 7; - - if ((dep_class == ITANIUM_CLASS_FMAC - || dep_class == ITANIUM_CLASS_FMISC - || dep_class == ITANIUM_CLASS_FCVTFX - || dep_class == ITANIUM_CLASS_XMPY) - && insn_class == ITANIUM_CLASS_STF) - return 8; - - /* Intel docs say only LD, ST, IALU, ILOG, ISHF consumers have latency 4, - but HP engineers say any non-MM operation. */ - if ((dep_class == ITANIUM_CLASS_MMMUL - || dep_class == ITANIUM_CLASS_MMSHF - || dep_class == ITANIUM_CLASS_MMSHFI) - && insn_class != ITANIUM_CLASS_MMMUL - && insn_class != ITANIUM_CLASS_MMSHF - && insn_class != ITANIUM_CLASS_MMSHFI) - return 4; - return cost; } -/* Describe the current state of the Itanium pipeline. */ -static struct -{ - /* The first slot that is used in the current cycle. */ - int first_slot; - /* The next slot to fill. */ - int cur; - /* The packet we have selected for the current issue window. */ - const struct ia64_packet *packet; - /* The position of the split issue that occurs due to issue width - limitations (6 if there's no split issue). */ - int split; - /* Record data about the insns scheduled so far in the same issue - window. The elements up to but not including FIRST_SLOT belong - to the previous cycle, the ones starting with FIRST_SLOT belong - to the current cycle. */ - enum attr_type types[6]; - rtx insns[6]; - int stopbit[6]; - /* Nonzero if we decided to schedule a stop bit. */ - int last_was_stop; -} sched_data; - -/* Temporary arrays; they have enough elements to hold all insns that - can be ready at the same time while scheduling of the current block. - SCHED_READY can hold ready insns, SCHED_TYPES their types. */ -static rtx *sched_ready; -static enum attr_type *sched_types; - -/* Determine whether an insn INSN of type ITYPE can fit into slot SLOT - of packet P. */ - -static int -insn_matches_slot (p, itype, slot, insn) - const struct ia64_packet *p; - enum attr_type itype; - int slot; - rtx insn; -{ - enum attr_itanium_requires_unit0 u0; - enum attr_type stype = p->t[slot]; - - if (insn) - { - u0 = ia64_safe_itanium_requires_unit0 (insn); - if (u0 == ITANIUM_REQUIRES_UNIT0_YES) - { - int i; - for (i = sched_data.first_slot; i < slot; i++) - if (p->t[i] == stype - || (stype == TYPE_F && p->t[i] == TYPE_L) - || (stype == TYPE_I && p->t[i] == TYPE_X)) - return 0; - } - if (GET_CODE (insn) == CALL_INSN) - { - /* Reject calls in multiway branch packets. We want to limit - the number of multiway branches we generate (since the branch - predictor is limited), and this seems to work fairly well. - (If we didn't do this, we'd have to add another test here to - force calls into the third slot of the bundle.) */ - if (slot < 3) - { - if (p->t[1] == TYPE_B) - return 0; - } - else - { - if (p->t[4] == TYPE_B) - return 0; - } - } - } - - if (itype == stype) - return 1; - if (itype == TYPE_A) - return stype == TYPE_M || stype == TYPE_I; - return 0; -} - /* Like emit_insn_before, but skip cycle_display notes. ??? When cycle display notes are implemented, update this. */ @@ -5775,1055 +5625,1324 @@ ia64_emit_insn_before (insn, before) emit_insn_before (insn, before); } -/* When rotating a bundle out of the issue window, insert a bundle selector - insn in front of it. DUMP is the scheduling dump file or NULL. START - is either 0 or 3, depending on whether we want to emit a bundle selector - for the first bundle or the second bundle in the current issue window. - - The selector insns are emitted this late because the selected packet can - be changed until parts of it get rotated out. */ +/* The following function marks insns who produce addresses for load + and store insns. Such insns will be placed into M slots because it + decrease latency time for Itanium1 (see function + `ia64_produce_address_p' and the DFA descriptions). */ static void -finish_last_head (dump, start) - FILE *dump; - int start; +ia64_dependencies_evaluation_hook (head, tail) + rtx head, tail; { - const struct ia64_packet *p = sched_data.packet; - const struct bundle *b = start == 0 ? p->t1 : p->t2; - int bundle_type = b - bundle; - rtx insn; - int i; - - if (! ia64_final_schedule) - return; - - for (i = start; sched_data.insns[i] == 0; i++) - if (i == start + 3) - abort (); - insn = sched_data.insns[i]; + rtx insn, link, next, next_tail; + + next_tail = NEXT_INSN (tail); + for (insn = head; insn != next_tail; insn = NEXT_INSN (insn)) + if (INSN_P (insn)) + insn->call = 0; + for (insn = head; insn != next_tail; insn = NEXT_INSN (insn)) + if (INSN_P (insn) + && ia64_safe_itanium_class (insn) == ITANIUM_CLASS_IALU) + { + for (link = INSN_DEPEND (insn); link != 0; link = XEXP (link, 1)) + { + next = XEXP (link, 0); + if ((ia64_safe_itanium_class (next) == ITANIUM_CLASS_ST + || ia64_safe_itanium_class (next) == ITANIUM_CLASS_STF) + && ia64_st_address_bypass_p (insn, next)) + break; + else if ((ia64_safe_itanium_class (next) == ITANIUM_CLASS_LD + || ia64_safe_itanium_class (next) + == ITANIUM_CLASS_FLD) + && ia64_ld_address_bypass_p (insn, next)) + break; + } + insn->call = link != 0; + } +} - if (dump) - fprintf (dump, "// Emitting template before %d: %s\n", - INSN_UID (insn), b->name); +/* We're beginning a new block. Initialize data structures as necessary. */ - ia64_emit_insn_before (gen_bundle_selector (GEN_INT (bundle_type)), insn); +static void +ia64_sched_init (dump, sched_verbose, max_ready) + FILE *dump ATTRIBUTE_UNUSED; + int sched_verbose ATTRIBUTE_UNUSED; + int max_ready ATTRIBUTE_UNUSED; +{ +#ifdef ENABLE_CHECKING + rtx insn; + + if (reload_completed) + for (insn = NEXT_INSN (current_sched_info->prev_head); + insn != current_sched_info->next_tail; + insn = NEXT_INSN (insn)) + if (SCHED_GROUP_P (insn)) + abort (); +#endif + last_scheduled_insn = NULL_RTX; + init_insn_group_barriers (); } -/* We can't schedule more insns this cycle. Fix up the scheduling state - and advance FIRST_SLOT and CUR. - We have to distribute the insns that are currently found between - FIRST_SLOT and CUR into the slots of the packet we have selected. So - far, they are stored successively in the fields starting at FIRST_SLOT; - now they must be moved to the correct slots. - DUMP is the current scheduling dump file, or NULL. */ +/* We are about to being issuing insns for this clock cycle. + Override the default sort algorithm to better slot instructions. */ -static void -cycle_end_fill_slots (dump) +static int +ia64_dfa_sched_reorder (dump, sched_verbose, ready, pn_ready, + clock_var, reorder_type) FILE *dump; + int sched_verbose; + rtx *ready; + int *pn_ready; + int clock_var ATTRIBUTE_UNUSED; + int reorder_type; { - const struct ia64_packet *packet = sched_data.packet; - int slot, i; - enum attr_type tmp_types[6]; - rtx tmp_insns[6]; + int n_asms; + int n_ready = *pn_ready; + rtx *e_ready = ready + n_ready; + rtx *insnp; - memcpy (tmp_types, sched_data.types, 6 * sizeof (enum attr_type)); - memcpy (tmp_insns, sched_data.insns, 6 * sizeof (rtx)); + if (sched_verbose) + fprintf (dump, "// ia64_dfa_sched_reorder (type %d):\n", reorder_type); - for (i = slot = sched_data.first_slot; i < sched_data.cur; i++) + if (reorder_type == 0) { - enum attr_type t = tmp_types[i]; - if (t != ia64_safe_type (tmp_insns[i])) - abort (); - while (! insn_matches_slot (packet, t, slot, tmp_insns[i])) - { - if (slot > sched_data.split) - abort (); - if (dump) - fprintf (dump, "// Packet needs %s, have %s\n", - type_names[packet->t[slot]], type_names[t]); - sched_data.types[slot] = packet->t[slot]; - sched_data.insns[slot] = 0; - sched_data.stopbit[slot] = 0; - - /* ??? TYPE_L instructions always fill up two slots, but we don't - support TYPE_L nops. */ - if (packet->t[slot] == TYPE_L) - abort (); - - slot++; - } - - /* Do _not_ use T here. If T == TYPE_A, then we'd risk changing the - actual slot type later. */ - sched_data.types[slot] = packet->t[slot]; - sched_data.insns[slot] = tmp_insns[i]; - sched_data.stopbit[slot] = 0; - slot++; + /* First, move all USEs, CLOBBERs and other crud out of the way. */ + n_asms = 0; + for (insnp = ready; insnp < e_ready; insnp++) + if (insnp < e_ready) + { + rtx insn = *insnp; + enum attr_type t = ia64_safe_type (insn); + if (t == TYPE_UNKNOWN) + { + if (GET_CODE (PATTERN (insn)) == ASM_INPUT + || asm_noperands (PATTERN (insn)) >= 0) + { + rtx lowest = ready[n_asms]; + ready[n_asms] = insn; + *insnp = lowest; + n_asms++; + } + else + { + rtx highest = ready[n_ready - 1]; + ready[n_ready - 1] = insn; + *insnp = highest; + return 1; + } + } + } - /* TYPE_L instructions always fill up two slots. */ - if (t == TYPE_L) + if (n_asms < n_ready) { - sched_data.types[slot] = packet->t[slot]; - sched_data.insns[slot] = 0; - sched_data.stopbit[slot] = 0; - slot++; + /* Some normal insns to process. Skip the asms. */ + ready += n_asms; + n_ready -= n_asms; } + else if (n_ready > 0) + return 1; } - /* This isn't right - there's no need to pad out until the forced split; - the CPU will automatically split if an insn isn't ready. */ -#if 0 - while (slot < sched_data.split) + if (ia64_final_schedule) { - sched_data.types[slot] = packet->t[slot]; - sched_data.insns[slot] = 0; - sched_data.stopbit[slot] = 0; - slot++; + int deleted = 0; + int nr_need_stop = 0; + + for (insnp = ready; insnp < e_ready; insnp++) + if (safe_group_barrier_needed_p (*insnp)) + nr_need_stop++; + + if (reorder_type == 1 && n_ready == nr_need_stop) + return 0; + if (reorder_type == 0) + return 1; + insnp = e_ready; + /* Move down everything that needs a stop bit, preserving + relative order. */ + while (insnp-- > ready + deleted) + while (insnp >= ready + deleted) + { + rtx insn = *insnp; + if (! safe_group_barrier_needed_p (insn)) + break; + memmove (ready + 1, ready, (insnp - ready) * sizeof (rtx)); + *ready = insn; + deleted++; + } + n_ready -= deleted; + ready += deleted; } -#endif - sched_data.first_slot = sched_data.cur = slot; + return 1; } -/* Bundle rotations, as described in the Itanium optimization manual. - We can rotate either one or both bundles out of the issue window. - DUMP is the current scheduling dump file, or NULL. */ - -static void -rotate_one_bundle (dump) - FILE *dump; -{ - if (dump) - fprintf (dump, "// Rotating one bundle.\n"); - - finish_last_head (dump, 0); - if (sched_data.cur > 3) - { - sched_data.cur -= 3; - sched_data.first_slot -= 3; - memmove (sched_data.types, - sched_data.types + 3, - sched_data.cur * sizeof *sched_data.types); - memmove (sched_data.stopbit, - sched_data.stopbit + 3, - sched_data.cur * sizeof *sched_data.stopbit); - memmove (sched_data.insns, - sched_data.insns + 3, - sched_data.cur * sizeof *sched_data.insns); - sched_data.packet - = &packets[(sched_data.packet->t2 - bundle) * NR_BUNDLES]; - } - else - { - sched_data.cur = 0; - sched_data.first_slot = 0; - } -} +/* We are about to being issuing insns for this clock cycle. Override + the default sort algorithm to better slot instructions. */ -static void -rotate_two_bundles (dump) +static int +ia64_sched_reorder (dump, sched_verbose, ready, pn_ready, clock_var) FILE *dump; + int sched_verbose; + rtx *ready; + int *pn_ready; + int clock_var; { - if (dump) - fprintf (dump, "// Rotating two bundles.\n"); - - if (sched_data.cur == 0) - return; - - finish_last_head (dump, 0); - if (sched_data.cur > 3) - finish_last_head (dump, 3); - sched_data.cur = 0; - sched_data.first_slot = 0; + return ia64_dfa_sched_reorder (dump, sched_verbose, ready, + pn_ready, clock_var, 0); } -/* We're beginning a new block. Initialize data structures as necessary. */ +/* Like ia64_sched_reorder, but called after issuing each insn. + Override the default sort algorithm to better slot instructions. */ -static void -ia64_sched_init (dump, sched_verbose, max_ready) +static int +ia64_sched_reorder2 (dump, sched_verbose, ready, pn_ready, clock_var) FILE *dump ATTRIBUTE_UNUSED; int sched_verbose ATTRIBUTE_UNUSED; - int max_ready; + rtx *ready; + int *pn_ready; + int clock_var; { - static int initialized = 0; - - if (! initialized) - { - int b1, b2, i; - - initialized = 1; - - for (i = b1 = 0; b1 < NR_BUNDLES; b1++) - { - const struct bundle *t1 = bundle + b1; - for (b2 = 0; b2 < NR_BUNDLES; b2++, i++) - { - const struct bundle *t2 = bundle + b2; - - packets[i].t1 = t1; - packets[i].t2 = t2; - } - } - for (i = 0; i < NR_PACKETS; i++) - { - int j; - for (j = 0; j < 3; j++) - packets[i].t[j] = packets[i].t1->t[j]; - for (j = 0; j < 3; j++) - packets[i].t[j + 3] = packets[i].t2->t[j]; - packets[i].first_split = itanium_split_issue (packets + i, 0); - } - - } - - init_insn_group_barriers (); - - memset (&sched_data, 0, sizeof sched_data); - sched_types = (enum attr_type *) xmalloc (max_ready - * sizeof (enum attr_type)); - sched_ready = (rtx *) xmalloc (max_ready * sizeof (rtx)); + if (ia64_tune == PROCESSOR_ITANIUM && reload_completed && last_scheduled_insn) + clocks [INSN_UID (last_scheduled_insn)] = clock_var; + return ia64_dfa_sched_reorder (dump, sched_verbose, ready, pn_ready, + clock_var, 1); } -/* See if the packet P can match the insns we have already scheduled. Return - nonzero if so. In *PSLOT, we store the first slot that is available for - more instructions if we choose this packet. - SPLIT holds the last slot we can use, there's a split issue after it so - scheduling beyond it would cause us to use more than one cycle. */ +/* We are about to issue INSN. Return the number of insns left on the + ready queue that can be issued this cycle. */ static int -packet_matches_p (p, split, pslot) - const struct ia64_packet *p; - int split; - int *pslot; -{ - int filled = sched_data.cur; - int first = sched_data.first_slot; - int i, slot; - - /* First, check if the first of the two bundles must be a specific one (due - to stop bits). */ - if (first > 0 && sched_data.stopbit[0] && p->t1->possible_stop != 1) - return 0; - if (first > 1 && sched_data.stopbit[1] && p->t1->possible_stop != 2) - return 0; - - for (i = 0; i < first; i++) - if (! insn_matches_slot (p, sched_data.types[i], i, - sched_data.insns[i])) - return 0; - for (i = slot = first; i < filled; i++) +ia64_variable_issue (dump, sched_verbose, insn, can_issue_more) + FILE *dump ATTRIBUTE_UNUSED; + int sched_verbose ATTRIBUTE_UNUSED; + rtx insn ATTRIBUTE_UNUSED; + int can_issue_more ATTRIBUTE_UNUSED; +{ + last_scheduled_insn = insn; + memcpy (prev_cycle_state, curr_state, dfa_state_size); + if (reload_completed) { - while (slot < split) - { - if (insn_matches_slot (p, sched_data.types[i], slot, - sched_data.insns[i])) - break; - slot++; - } - if (slot == split) - return 0; - slot++; + if (group_barrier_needed_p (insn)) + abort (); + if (GET_CODE (insn) == CALL_INSN) + init_insn_group_barriers (); + stops_p [INSN_UID (insn)] = stop_before_p; + stop_before_p = 0; } - - if (pslot) - *pslot = slot; return 1; } -/* A frontend for itanium_split_issue. For a packet P and a slot - number FIRST that describes the start of the current clock cycle, - return the slot number of the first split issue. This function - uses the cached number found in P if possible. */ +/* We are choosing insn from the ready queue. Return nonzero if INSN + can be chosen. */ static int -get_split (p, first) - const struct ia64_packet *p; - int first; +ia64_first_cycle_multipass_dfa_lookahead_guard (insn) + rtx insn; { - if (first == 0) - return p->first_split; - return itanium_split_issue (p, first); + if (insn == NULL_RTX || !INSN_P (insn)) + abort (); + return (!reload_completed + || !safe_group_barrier_needed_p (insn)); } -/* Given N_READY insns in the array READY, whose types are found in the - corresponding array TYPES, return the insn that is best suited to be - scheduled in slot SLOT of packet P. */ +/* The following variable value is pseudo-insn used by the DFA insn + scheduler to change the DFA state when the simulated clock is + increased. */ + +static rtx dfa_pre_cycle_insn; + +/* We are about to being issuing INSN. Return nonzero if we can not + issue it on given cycle CLOCK and return zero if we should not sort + the ready queue on the next clock start. */ static int -find_best_insn (ready, types, n_ready, p, slot) - rtx *ready; - enum attr_type *types; - int n_ready; - const struct ia64_packet *p; - int slot; +ia64_dfa_new_cycle (dump, verbose, insn, last_clock, clock, sort_p) + FILE *dump; + int verbose; + rtx insn; + int last_clock, clock; + int *sort_p; { - int best = -1; - int best_pri = 0; - while (n_ready-- > 0) + int setup_clocks_p = FALSE; + + if (insn == NULL_RTX || !INSN_P (insn)) + abort (); + if ((reload_completed && safe_group_barrier_needed_p (insn)) + || (last_scheduled_insn + && (GET_CODE (last_scheduled_insn) == CALL_INSN + || GET_CODE (PATTERN (last_scheduled_insn)) == ASM_INPUT + || asm_noperands (PATTERN (last_scheduled_insn)) >= 0))) { - rtx insn = ready[n_ready]; - if (! insn) - continue; - if (best >= 0 && INSN_PRIORITY (ready[n_ready]) < best_pri) - break; - /* If we have equally good insns, one of which has a stricter - slot requirement, prefer the one with the stricter requirement. */ - if (best >= 0 && types[n_ready] == TYPE_A) - continue; - if (insn_matches_slot (p, types[n_ready], slot, insn)) + init_insn_group_barriers (); + if (verbose && dump) + fprintf (dump, "// Stop should be before %d%s\n", INSN_UID (insn), + last_clock == clock ? " + cycle advance" : ""); + stop_before_p = 1; + if (last_clock == clock) { - best = n_ready; - best_pri = INSN_PRIORITY (ready[best]); - - /* If there's no way we could get a stricter requirement, stop - looking now. */ - if (types[n_ready] != TYPE_A - && ia64_safe_itanium_requires_unit0 (ready[n_ready])) - break; - break; + state_transition (curr_state, dfa_stop_insn); + if (TARGET_EARLY_STOP_BITS) + *sort_p = (last_scheduled_insn == NULL_RTX + || GET_CODE (last_scheduled_insn) != CALL_INSN); + else + *sort_p = 0; + return 1; + } + else if (reload_completed) + setup_clocks_p = TRUE; + memcpy (curr_state, prev_cycle_state, dfa_state_size); + state_transition (curr_state, dfa_stop_insn); + state_transition (curr_state, dfa_pre_cycle_insn); + state_transition (curr_state, NULL); + } + else if (reload_completed) + setup_clocks_p = TRUE; + if (setup_clocks_p && ia64_tune == PROCESSOR_ITANIUM) + { + enum attr_itanium_class c = ia64_safe_itanium_class (insn); + + if (c != ITANIUM_CLASS_MMMUL && c != ITANIUM_CLASS_MMSHF) + { + rtx link; + int d = -1; + + for (link = LOG_LINKS (insn); link; link = XEXP (link, 1)) + if (REG_NOTE_KIND (link) == 0) + { + enum attr_itanium_class dep_class; + rtx dep_insn = XEXP (link, 0); + + dep_class = ia64_safe_itanium_class (dep_insn); + if ((dep_class == ITANIUM_CLASS_MMMUL + || dep_class == ITANIUM_CLASS_MMSHF) + && last_clock - clocks [INSN_UID (dep_insn)] < 4 + && (d < 0 + || last_clock - clocks [INSN_UID (dep_insn)] < d)) + d = last_clock - clocks [INSN_UID (dep_insn)]; + } + if (d >= 0) + add_cycles [INSN_UID (insn)] = 3 - d; } } - return best; + return 0; } -/* Select the best packet to use given the current scheduler state and the - current ready list. - READY is an array holding N_READY ready insns; TYPES is a corresponding - array that holds their types. Store the best packet in *PPACKET and the - number of insns that can be scheduled in the current cycle in *PBEST. */ + -static void -find_best_packet (pbest, ppacket, ready, types, n_ready) - int *pbest; - const struct ia64_packet **ppacket; - rtx *ready; - enum attr_type *types; - int n_ready; -{ - int first = sched_data.first_slot; - int best = 0; - int lowest_end = 6; - const struct ia64_packet *best_packet = NULL; - int i; +/* The following page contains abstract data `bundle states' which are + used for bundling insns (inserting nops and template generation). */ + +/* The following describes state of insn bundling. */ + +struct bundle_state +{ + /* Unique bundle state number to identify them in the debugging + output */ + int unique_num; + rtx insn; /* corresponding insn, NULL for the 1st and the last state */ + /* number nops before and after the insn */ + short before_nops_num, after_nops_num; + int insn_num; /* insn number (0 - for initial state, 1 - for the 1st + insn */ + int cost; /* cost of the state in cycles */ + int accumulated_insns_num; /* number of all previous insns including + nops. L is considered as 2 insns */ + int branch_deviation; /* deviation of previous branches from 3rd slots */ + struct bundle_state *next; /* next state with the same insn_num */ + struct bundle_state *originator; /* originator (previous insn state) */ + /* All bundle states are in the following chain. */ + struct bundle_state *allocated_states_chain; + /* The DFA State after issuing the insn and the nops. */ + state_t dfa_state; +}; - for (i = 0; i < NR_PACKETS; i++) - { - const struct ia64_packet *p = packets + i; - int slot; - int split = get_split (p, first); - int win = 0; - int first_slot, last_slot; - int b_nops = 0; +/* The following is map insn number to the corresponding bundle state. */ - if (! packet_matches_p (p, split, &first_slot)) - continue; +static struct bundle_state **index_to_bundle_states; - memcpy (sched_ready, ready, n_ready * sizeof (rtx)); +/* The unique number of next bundle state. */ - win = 0; - last_slot = 6; - for (slot = first_slot; slot < split; slot++) - { - int insn_nr; +static int bundle_states_num; - /* Disallow a degenerate case where the first bundle doesn't - contain anything but NOPs! */ - if (first_slot == 0 && win == 0 && slot == 3) - { - win = -1; - break; - } +/* All allocated bundle states are in the following chain. */ - insn_nr = find_best_insn (sched_ready, types, n_ready, p, slot); - if (insn_nr >= 0) - { - sched_ready[insn_nr] = 0; - last_slot = slot; - win++; - } - else if (p->t[slot] == TYPE_B) - b_nops++; - } - /* We must disallow MBB/BBB packets if any of their B slots would be - filled with nops. */ - if (last_slot < 3) - { - if (p->t[1] == TYPE_B && (b_nops || last_slot < 2)) - win = -1; - } - else - { - if (p->t[4] == TYPE_B && (b_nops || last_slot < 5)) - win = -1; - } +static struct bundle_state *allocated_bundle_states_chain; - if (win > best - || (win == best && last_slot < lowest_end)) - { - best = win; - lowest_end = last_slot; - best_packet = p; - } - } - *pbest = best; - *ppacket = best_packet; -} +/* All allocated but not used bundle states are in the following + chain. */ -/* Reorder the ready list so that the insns that can be issued in this cycle - are found in the correct order at the end of the list. - DUMP is the scheduling dump file, or NULL. READY points to the start, - E_READY to the end of the ready list. MAY_FAIL determines what should be - done if no insns can be scheduled in this cycle: if it is zero, we abort, - otherwise we return 0. - Return 1 if any insns can be scheduled in this cycle. */ +static struct bundle_state *free_bundle_state_chain; -static int -itanium_reorder (dump, ready, e_ready, may_fail) - FILE *dump; - rtx *ready; - rtx *e_ready; - int may_fail; -{ - const struct ia64_packet *best_packet; - int n_ready = e_ready - ready; - int first = sched_data.first_slot; - int i, best, best_split, filled; - for (i = 0; i < n_ready; i++) - sched_types[i] = ia64_safe_type (ready[i]); +/* The following function returns a free bundle state. */ - find_best_packet (&best, &best_packet, ready, sched_types, n_ready); +static struct bundle_state * +get_free_bundle_state () +{ + struct bundle_state *result; - if (best == 0) + if (free_bundle_state_chain != NULL) { - if (may_fail) - return 0; - abort (); + result = free_bundle_state_chain; + free_bundle_state_chain = result->next; } - - if (dump) + else { - fprintf (dump, "// Selected bundles: %s %s (%d insns)\n", - best_packet->t1->name, - best_packet->t2 ? best_packet->t2->name : NULL, best); + result = xmalloc (sizeof (struct bundle_state)); + result->dfa_state = xmalloc (dfa_state_size); + result->allocated_states_chain = allocated_bundle_states_chain; + allocated_bundle_states_chain = result; } + result->unique_num = bundle_states_num++; + return result; + +} - best_split = itanium_split_issue (best_packet, first); - packet_matches_p (best_packet, best_split, &filled); +/* The following function frees given bundle state. */ - for (i = filled; i < best_split; i++) - { - int insn_nr; +static void +free_bundle_state (state) + struct bundle_state *state; +{ + state->next = free_bundle_state_chain; + free_bundle_state_chain = state; +} - insn_nr = find_best_insn (ready, sched_types, n_ready, best_packet, i); - if (insn_nr >= 0) - { - rtx insn = ready[insn_nr]; - memmove (ready + insn_nr, ready + insn_nr + 1, - (n_ready - insn_nr - 1) * sizeof (rtx)); - memmove (sched_types + insn_nr, sched_types + insn_nr + 1, - (n_ready - insn_nr - 1) * sizeof (enum attr_type)); - ready[--n_ready] = insn; - } - } +/* Start work with abstract data `bundle states'. */ - sched_data.packet = best_packet; - sched_data.split = best_split; - return 1; +static void +initiate_bundle_states () +{ + bundle_states_num = 0; + free_bundle_state_chain = NULL; + allocated_bundle_states_chain = NULL; } -/* Dump information about the current scheduling state to file DUMP. */ +/* Finish work with abstract data `bundle states'. */ static void -dump_current_packet (dump) - FILE *dump; +finish_bundle_states () { - int i; - fprintf (dump, "// %d slots filled:", sched_data.cur); - for (i = 0; i < sched_data.first_slot; i++) - { - rtx insn = sched_data.insns[i]; - fprintf (dump, " %s", type_names[sched_data.types[i]]); - if (insn) - fprintf (dump, "/%s", type_names[ia64_safe_type (insn)]); - if (sched_data.stopbit[i]) - fprintf (dump, " ;;"); - } - fprintf (dump, " :::"); - for (i = sched_data.first_slot; i < sched_data.cur; i++) + struct bundle_state *curr_state, *next_state; + + for (curr_state = allocated_bundle_states_chain; + curr_state != NULL; + curr_state = next_state) { - rtx insn = sched_data.insns[i]; - enum attr_type t = ia64_safe_type (insn); - fprintf (dump, " (%d) %s", INSN_UID (insn), type_names[t]); + next_state = curr_state->allocated_states_chain; + free (curr_state->dfa_state); + free (curr_state); } - fprintf (dump, "\n"); } -/* Schedule a stop bit. DUMP is the current scheduling dump file, or - NULL. */ +/* Hash table of the bundle states. The key is dfa_state and insn_num + of the bundle states. */ -static void -schedule_stop (dump) - FILE *dump; -{ - const struct ia64_packet *best = sched_data.packet; - int i; - int best_stop = 6; +static htab_t bundle_state_table; - if (dump) - fprintf (dump, "// Stop bit, cur = %d.\n", sched_data.cur); +/* The function returns hash of BUNDLE_STATE. */ - if (sched_data.cur == 0) - { - if (dump) - fprintf (dump, "// At start of bundle, so nothing to do.\n"); - - rotate_two_bundles (NULL); - return; - } +static unsigned +bundle_state_hash (bundle_state) + const void *bundle_state; +{ + const struct bundle_state *state = (struct bundle_state *) bundle_state; + unsigned result, i; - for (i = -1; i < NR_PACKETS; i++) - { - /* This is a slight hack to give the current packet the first chance. - This is done to avoid e.g. switching from MIB to MBB bundles. */ - const struct ia64_packet *p = (i >= 0 ? packets + i : sched_data.packet); - int split = get_split (p, sched_data.first_slot); - const struct bundle *compare; - int next, stoppos; + for (result = i = 0; i < dfa_state_size; i++) + result += (((unsigned char *) state->dfa_state) [i] + << ((i % CHAR_BIT) * 3 + CHAR_BIT)); + return result + state->insn_num; +} - if (! packet_matches_p (p, split, &next)) - continue; +/* The function returns nonzero if the bundle state keys are equal. */ - compare = next > 3 ? p->t2 : p->t1; +static int +bundle_state_eq_p (bundle_state_1, bundle_state_2) + const void *bundle_state_1; + const void *bundle_state_2; +{ + const struct bundle_state * state1 = (struct bundle_state *) bundle_state_1; + const struct bundle_state * state2 = (struct bundle_state *) bundle_state_2; - stoppos = 3; - if (compare->possible_stop) - stoppos = compare->possible_stop; - if (next > 3) - stoppos += 3; + return (state1->insn_num == state2->insn_num + && memcmp (state1->dfa_state, state2->dfa_state, + dfa_state_size) == 0); +} - if (stoppos < next || stoppos >= best_stop) - { - if (compare->possible_stop == 0) - continue; - stoppos = (next > 3 ? 6 : 3); - } - if (stoppos < next || stoppos >= best_stop) - continue; +/* The function inserts the BUNDLE_STATE into the hash table. The + function returns nonzero if the bundle has been inserted into the + table. The table contains the best bundle state with given key. */ - if (dump) - fprintf (dump, "// switching from %s %s to %s %s (stop at %d)\n", - best->t1->name, best->t2->name, p->t1->name, p->t2->name, - stoppos); +static int +insert_bundle_state (bundle_state) + struct bundle_state *bundle_state; +{ + void **entry_ptr; - best_stop = stoppos; - best = p; + entry_ptr = htab_find_slot (bundle_state_table, bundle_state, 1); + if (*entry_ptr == NULL) + { + bundle_state->next = index_to_bundle_states [bundle_state->insn_num]; + index_to_bundle_states [bundle_state->insn_num] = bundle_state; + *entry_ptr = (void *) bundle_state; + return TRUE; } - - sched_data.packet = best; - cycle_end_fill_slots (dump); - while (sched_data.cur < best_stop) + else if (bundle_state->cost < ((struct bundle_state *) *entry_ptr)->cost + || (bundle_state->cost == ((struct bundle_state *) *entry_ptr)->cost + && (((struct bundle_state *)*entry_ptr)->accumulated_insns_num + > bundle_state->accumulated_insns_num + || (((struct bundle_state *) + *entry_ptr)->accumulated_insns_num + == bundle_state->accumulated_insns_num + && ((struct bundle_state *) + *entry_ptr)->branch_deviation + > bundle_state->branch_deviation)))) + { - sched_data.types[sched_data.cur] = best->t[sched_data.cur]; - sched_data.insns[sched_data.cur] = 0; - sched_data.stopbit[sched_data.cur] = 0; - sched_data.cur++; + struct bundle_state temp; + + temp = *(struct bundle_state *) *entry_ptr; + *(struct bundle_state *) *entry_ptr = *bundle_state; + ((struct bundle_state *) *entry_ptr)->next = temp.next; + *bundle_state = temp; } - sched_data.stopbit[sched_data.cur - 1] = 1; - sched_data.first_slot = best_stop; + return FALSE; +} + +/* Start work with the hash table. */ - if (dump) - dump_current_packet (dump); +static void +initiate_bundle_state_table () +{ + bundle_state_table = htab_create (50, bundle_state_hash, bundle_state_eq_p, + (htab_del) 0); } -/* If necessary, perform one or two rotations on the scheduling state. - This should only be called if we are starting a new cycle. */ +/* Finish work with the hash table. */ static void -maybe_rotate (dump) - FILE *dump; +finish_bundle_state_table () { - cycle_end_fill_slots (dump); - if (sched_data.cur == 6) - rotate_two_bundles (dump); - else if (sched_data.cur >= 3) - rotate_one_bundle (dump); - sched_data.first_slot = sched_data.cur; + htab_delete (bundle_state_table); } -/* The clock cycle when ia64_sched_reorder was last called. */ -static int prev_cycle; + -/* The first insn scheduled in the previous cycle. This is the saved - value of sched_data.first_slot. */ -static int prev_first; +/* The following variable is a insn `nop' used to check bundle states + with different number of inserted nops. */ -/* Emit NOPs to fill the delay between PREV_CYCLE and CLOCK_VAR. Used to - pad out the delay between MM (shifts, etc.) and integer operations. */ +static rtx ia64_nop; -static void -nop_cycles_until (clock_var, dump) - int clock_var; - FILE *dump; +/* The following function tries to issue NOPS_NUM nops for the current + state without advancing processor cycle. If it failed, the + function returns FALSE and frees the current state. */ + +static int +try_issue_nops (curr_state, nops_num) + struct bundle_state *curr_state; + int nops_num; { - int prev_clock = prev_cycle; - int cycles_left = clock_var - prev_clock; - bool did_stop = false; + int i; - /* Finish the previous cycle; pad it out with NOPs. */ - if (sched_data.cur == 3) - { - sched_emit_insn (gen_insn_group_barrier (GEN_INT (3))); - did_stop = true; - maybe_rotate (dump); - } - else if (sched_data.cur > 0) - { - int need_stop = 0; - int split = itanium_split_issue (sched_data.packet, prev_first); + for (i = 0; i < nops_num; i++) + if (state_transition (curr_state->dfa_state, ia64_nop) >= 0) + { + free_bundle_state (curr_state); + return FALSE; + } + return TRUE; +} - if (sched_data.cur < 3 && split > 3) - { - split = 3; - need_stop = 1; - } +/* The following function tries to issue INSN for the current + state without advancing processor cycle. If it failed, the + function returns FALSE and frees the current state. */ - if (split > sched_data.cur) - { - int i; - for (i = sched_data.cur; i < split; i++) - { - rtx t = sched_emit_insn (gen_nop_type (sched_data.packet->t[i])); - sched_data.types[i] = sched_data.packet->t[i]; - sched_data.insns[i] = t; - sched_data.stopbit[i] = 0; - } - sched_data.cur = split; - } +static int +try_issue_insn (curr_state, insn) + struct bundle_state *curr_state; + rtx insn; +{ + if (insn && state_transition (curr_state->dfa_state, insn) >= 0) + { + free_bundle_state (curr_state); + return FALSE; + } + return TRUE; +} - if (! need_stop && sched_data.cur > 0 && sched_data.cur < 6 - && cycles_left > 1) - { - int i; - for (i = sched_data.cur; i < 6; i++) - { - rtx t = sched_emit_insn (gen_nop_type (sched_data.packet->t[i])); - sched_data.types[i] = sched_data.packet->t[i]; - sched_data.insns[i] = t; - sched_data.stopbit[i] = 0; - } - sched_data.cur = 6; - cycles_left--; - need_stop = 1; - } +/* The following function tries to issue BEFORE_NOPS_NUM nops and INSN + starting with ORIGINATOR without advancing processor cycle. If + TRY_BUNDLE_END_P is TRUE, the function also tries to issue nops to + fill all bundle. If it was successful, the function creates new + bundle state and insert into the hash table and into + `index_to_bundle_states'. */ - if (need_stop || sched_data.cur == 6) +static void +issue_nops_and_insn (originator, before_nops_num, insn, try_bundle_end_p) + struct bundle_state *originator; + int before_nops_num; + rtx insn; + int try_bundle_end_p; +{ + struct bundle_state *curr_state; + + curr_state = get_free_bundle_state (); + memcpy (curr_state->dfa_state, originator->dfa_state, dfa_state_size); + curr_state->insn = insn; + curr_state->insn_num = originator->insn_num + 1; + curr_state->cost = originator->cost; + curr_state->originator = originator; + curr_state->before_nops_num = before_nops_num; + curr_state->after_nops_num = 0; + curr_state->accumulated_insns_num + = originator->accumulated_insns_num + before_nops_num; + curr_state->branch_deviation = originator->branch_deviation; + if (insn == NULL_RTX) + abort (); + else if (INSN_CODE (insn) == CODE_FOR_insn_group_barrier) + { + if (GET_MODE (insn) == TImode) + abort (); + if (!try_issue_nops (curr_state, before_nops_num)) + return; + if (!try_issue_insn (curr_state, insn)) + return; + memcpy (temp_dfa_state, curr_state->dfa_state, dfa_state_size); + if (state_transition (temp_dfa_state, dfa_pre_cycle_insn) >= 0 + && curr_state->accumulated_insns_num % 3 != 0) { - sched_emit_insn (gen_insn_group_barrier (GEN_INT (3))); - did_stop = true; + free_bundle_state (curr_state); + return; } - maybe_rotate (dump); } - - cycles_left--; - while (cycles_left > 0) + else if (GET_MODE (insn) != TImode) { - sched_emit_insn (gen_bundle_selector (GEN_INT (0))); - sched_emit_insn (gen_nop_type (TYPE_M)); - sched_emit_insn (gen_nop_type (TYPE_I)); - if (cycles_left > 1) + if (!try_issue_nops (curr_state, before_nops_num)) + return; + if (!try_issue_insn (curr_state, insn)) + return; + if (GET_CODE (PATTERN (insn)) != ASM_INPUT + && asm_noperands (PATTERN (insn)) < 0) + curr_state->accumulated_insns_num++; + if (ia64_safe_type (insn) == TYPE_L) + curr_state->accumulated_insns_num++; + } + else + { + state_transition (curr_state->dfa_state, dfa_pre_cycle_insn); + state_transition (curr_state->dfa_state, NULL); + curr_state->cost++; + if (!try_issue_nops (curr_state, before_nops_num)) + return; + if (!try_issue_insn (curr_state, insn)) + return; + if (GET_CODE (PATTERN (insn)) != ASM_INPUT + && asm_noperands (PATTERN (insn)) < 0) + curr_state->accumulated_insns_num++; + if (ia64_safe_type (insn) == TYPE_L) + curr_state->accumulated_insns_num++; + } + if (ia64_safe_type (insn) == TYPE_B) + curr_state->branch_deviation + += 2 - (curr_state->accumulated_insns_num - 1) % 3; + if (try_bundle_end_p && curr_state->accumulated_insns_num % 3 != 0) + { + if (insert_bundle_state (curr_state)) { - sched_emit_insn (gen_insn_group_barrier (GEN_INT (2))); - cycles_left--; + state_t dfa_state; + struct bundle_state *curr_state1; + struct bundle_state *allocated_states_chain; + + curr_state1 = get_free_bundle_state (); + dfa_state = curr_state1->dfa_state; + allocated_states_chain = curr_state1->allocated_states_chain; + *curr_state1 = *curr_state; + curr_state1->dfa_state = dfa_state; + curr_state1->allocated_states_chain = allocated_states_chain; + memcpy (curr_state1->dfa_state, curr_state->dfa_state, + dfa_state_size); + curr_state = curr_state1; } - sched_emit_insn (gen_nop_type (TYPE_I)); - sched_emit_insn (gen_insn_group_barrier (GEN_INT (3))); - did_stop = true; - cycles_left--; + if (!try_issue_nops (curr_state, + 3 - curr_state->accumulated_insns_num % 3)) + return; + curr_state->after_nops_num + = 3 - curr_state->accumulated_insns_num % 3; + curr_state->accumulated_insns_num + += 3 - curr_state->accumulated_insns_num % 3; } + if (!insert_bundle_state (curr_state)) + free_bundle_state (curr_state); + return; +} + +/* The following function returns position in the two window bundle + for given STATE. */ - if (did_stop) - init_insn_group_barriers (); +static int +get_max_pos (state) + state_t state; +{ + if (cpu_unit_reservation_p (state, pos_6)) + return 6; + else if (cpu_unit_reservation_p (state, pos_5)) + return 5; + else if (cpu_unit_reservation_p (state, pos_4)) + return 4; + else if (cpu_unit_reservation_p (state, pos_3)) + return 3; + else if (cpu_unit_reservation_p (state, pos_2)) + return 2; + else if (cpu_unit_reservation_p (state, pos_1)) + return 1; + else + return 0; } -/* We are about to being issuing insns for this clock cycle. - Override the default sort algorithm to better slot instructions. */ +/* The function returns code of a possible template for given position + and state. The function should be called only with 2 values of + position equal to 3 or 6. */ static int -ia64_internal_sched_reorder (dump, sched_verbose, ready, pn_ready, - reorder_type, clock_var) - FILE *dump ATTRIBUTE_UNUSED; - int sched_verbose ATTRIBUTE_UNUSED; - rtx *ready; - int *pn_ready; - int reorder_type, clock_var; +get_template (state, pos) + state_t state; + int pos; { - int n_asms; - int n_ready = *pn_ready; - rtx *e_ready = ready + n_ready; - rtx *insnp; - - if (sched_verbose) + switch (pos) { - fprintf (dump, "// ia64_sched_reorder (type %d):\n", reorder_type); - dump_current_packet (dump); + case 3: + if (cpu_unit_reservation_p (state, _0mii_)) + return 0; + else if (cpu_unit_reservation_p (state, _0mmi_)) + return 1; + else if (cpu_unit_reservation_p (state, _0mfi_)) + return 2; + else if (cpu_unit_reservation_p (state, _0mmf_)) + return 3; + else if (cpu_unit_reservation_p (state, _0bbb_)) + return 4; + else if (cpu_unit_reservation_p (state, _0mbb_)) + return 5; + else if (cpu_unit_reservation_p (state, _0mib_)) + return 6; + else if (cpu_unit_reservation_p (state, _0mmb_)) + return 7; + else if (cpu_unit_reservation_p (state, _0mfb_)) + return 8; + else if (cpu_unit_reservation_p (state, _0mlx_)) + return 9; + else + abort (); + case 6: + if (cpu_unit_reservation_p (state, _1mii_)) + return 0; + else if (cpu_unit_reservation_p (state, _1mmi_)) + return 1; + else if (cpu_unit_reservation_p (state, _1mfi_)) + return 2; + else if (_1mmf_ >= 0 && cpu_unit_reservation_p (state, _1mmf_)) + return 3; + else if (cpu_unit_reservation_p (state, _1bbb_)) + return 4; + else if (cpu_unit_reservation_p (state, _1mbb_)) + return 5; + else if (cpu_unit_reservation_p (state, _1mib_)) + return 6; + else if (cpu_unit_reservation_p (state, _1mmb_)) + return 7; + else if (cpu_unit_reservation_p (state, _1mfb_)) + return 8; + else if (cpu_unit_reservation_p (state, _1mlx_)) + return 9; + else + abort (); + default: + abort (); } +} - /* Work around the pipeline flush that will occurr if the results of - an MM instruction are accessed before the result is ready. Intel - documentation says this only happens with IALU, ISHF, ILOG, LD, - and ST consumers, but experimental evidence shows that *any* non-MM - type instruction will incurr the flush. */ - if (reorder_type == 0 && clock_var > 0 && ia64_final_schedule) - { - for (insnp = ready; insnp < e_ready; insnp++) - { - rtx insn = *insnp, link; - enum attr_itanium_class t = ia64_safe_itanium_class (insn); - - if (t == ITANIUM_CLASS_MMMUL - || t == ITANIUM_CLASS_MMSHF - || t == ITANIUM_CLASS_MMSHFI) - continue; +/* The following function returns an insn important for insn bundling + followed by INSN and before TAIL. */ - for (link = LOG_LINKS (insn); link; link = XEXP (link, 1)) - if (REG_NOTE_KIND (link) == 0) - { - rtx other = XEXP (link, 0); - enum attr_itanium_class t0 = ia64_safe_itanium_class (other); - if (t0 == ITANIUM_CLASS_MMSHF || t0 == ITANIUM_CLASS_MMMUL) - { - nop_cycles_until (clock_var, sched_verbose ? dump : NULL); - goto out; - } - } - } - } - out: +static rtx +get_next_important_insn (insn, tail) + rtx insn, tail; +{ + for (; insn && insn != tail; insn = NEXT_INSN (insn)) + if (INSN_P (insn) + && ia64_safe_itanium_class (insn) != ITANIUM_CLASS_IGNORE + && GET_CODE (PATTERN (insn)) != USE + && GET_CODE (PATTERN (insn)) != CLOBBER) + return insn; + return NULL_RTX; +} - prev_first = sched_data.first_slot; - prev_cycle = clock_var; +/* The following function does insn bundling. Bundling algorithm is + based on dynamic programming. It tries to insert different number of + nop insns before/after the real insns. At the end of EBB, it chooses the + best alternative and then, moving back in EBB, inserts templates for + the best alternative. The algorithm is directed by information + (changes of simulated processor cycle) created by the 2nd insn + scheduling. */ - if (reorder_type == 0) - maybe_rotate (sched_verbose ? dump : NULL); +static void +bundling (dump, verbose, prev_head_insn, tail) + FILE *dump; + int verbose; + rtx prev_head_insn, tail; +{ + struct bundle_state *curr_state, *next_state, *best_state; + rtx insn, next_insn; + int insn_num; + int i, bundle_end_p; + int pos, max_pos, template0, template1; + rtx b; + rtx nop; + enum attr_type type; - /* First, move all USEs, CLOBBERs and other crud out of the way. */ - n_asms = 0; - for (insnp = ready; insnp < e_ready; insnp++) - if (insnp < e_ready) + insn_num = 0; + for (insn = NEXT_INSN (prev_head_insn); + insn && insn != tail; + insn = NEXT_INSN (insn)) + if (INSN_P (insn)) + insn_num++; + if (insn_num == 0) + return; + bundling_p = 1; + dfa_clean_insn_cache (); + initiate_bundle_state_table (); + index_to_bundle_states = xmalloc ((insn_num + 2) + * sizeof (struct bundle_state *)); + /* First (forward) pass -- generates states. */ + curr_state = get_free_bundle_state (); + curr_state->insn = NULL; + curr_state->before_nops_num = 0; + curr_state->after_nops_num = 0; + curr_state->insn_num = 0; + curr_state->cost = 0; + curr_state->accumulated_insns_num = 0; + curr_state->branch_deviation = 0; + curr_state->next = NULL; + curr_state->originator = NULL; + state_reset (curr_state->dfa_state); + index_to_bundle_states [0] = curr_state; + insn_num = 0; + for (insn = NEXT_INSN (prev_head_insn); + insn != tail; + insn = NEXT_INSN (insn)) + if (INSN_P (insn) + && (ia64_safe_itanium_class (insn) == ITANIUM_CLASS_IGNORE + || GET_CODE (PATTERN (insn)) == USE + || GET_CODE (PATTERN (insn)) == CLOBBER) + && GET_MODE (insn) == TImode) { - rtx insn = *insnp; - enum attr_type t = ia64_safe_type (insn); - if (t == TYPE_UNKNOWN) - { - if (GET_CODE (PATTERN (insn)) == ASM_INPUT - || asm_noperands (PATTERN (insn)) >= 0) - { - rtx lowest = ready[n_asms]; - ready[n_asms] = insn; - *insnp = lowest; - n_asms++; - } - else - { - rtx highest = ready[n_ready - 1]; - ready[n_ready - 1] = insn; - *insnp = highest; - if (ia64_final_schedule && group_barrier_needed_p (insn)) - { - schedule_stop (sched_verbose ? dump : NULL); - sched_data.last_was_stop = 1; - maybe_rotate (sched_verbose ? dump : NULL); - } - - return 1; - } - } + PUT_MODE (insn, VOIDmode); + for (next_insn = NEXT_INSN (insn); + next_insn != tail; + next_insn = NEXT_INSN (next_insn)) + if (INSN_P (next_insn) + && ia64_safe_itanium_class (next_insn) != ITANIUM_CLASS_IGNORE + && GET_CODE (PATTERN (next_insn)) != USE + && GET_CODE (PATTERN (next_insn)) != CLOBBER) + { + PUT_MODE (next_insn, TImode); + break; + } } - if (n_asms < n_ready) + for (insn = get_next_important_insn (NEXT_INSN (prev_head_insn), tail); + insn != NULL_RTX; + insn = next_insn) { - /* Some normal insns to process. Skip the asms. */ - ready += n_asms; - n_ready -= n_asms; - } - else if (n_ready > 0) - { - /* Only asm insns left. */ - if (ia64_final_schedule && group_barrier_needed_p (ready[n_ready - 1])) + if (!INSN_P (insn) + || ia64_safe_itanium_class (insn) == ITANIUM_CLASS_IGNORE + || GET_CODE (PATTERN (insn)) == USE + || GET_CODE (PATTERN (insn)) == CLOBBER) + abort (); + next_insn = get_next_important_insn (NEXT_INSN (insn), tail); + insn_num++; + index_to_bundle_states [insn_num] = NULL; + for (curr_state = index_to_bundle_states [insn_num - 1]; + curr_state != NULL; + curr_state = next_state) { - schedule_stop (sched_verbose ? dump : NULL); - sched_data.last_was_stop = 1; - maybe_rotate (sched_verbose ? dump : NULL); + pos = curr_state->accumulated_insns_num % 3; + type = ia64_safe_type (insn); + next_state = curr_state->next; + bundle_end_p + = (next_insn == NULL_RTX + || (GET_MODE (next_insn) == TImode + && INSN_CODE (insn) != CODE_FOR_insn_group_barrier)); + if (type == TYPE_F || type == TYPE_B || type == TYPE_L + || type == TYPE_S + /* We need to insert 2 Nops for cases like M_MII. */ + || (type == TYPE_M && ia64_tune == PROCESSOR_ITANIUM + && !bundle_end_p && pos == 1)) + issue_nops_and_insn (curr_state, 2, insn, bundle_end_p); + issue_nops_and_insn (curr_state, 1, insn, bundle_end_p); + issue_nops_and_insn (curr_state, 0, insn, bundle_end_p); } - cycle_end_fill_slots (sched_verbose ? dump : NULL); - return 1; + if (index_to_bundle_states [insn_num] == NULL) + abort (); + for (curr_state = index_to_bundle_states [insn_num]; + curr_state != NULL; + curr_state = curr_state->next) + if (verbose >= 2 && dump) + { + struct DFA_chip + { + unsigned short one_automaton_state; + unsigned short oneb_automaton_state; + unsigned short two_automaton_state; + unsigned short twob_automaton_state; + }; + + fprintf + (dump, + "// Bundle state %d (orig %d, cost %d, nops %d/%d, insns %d, branch %d, state %d) for %d\n", + curr_state->unique_num, + (curr_state->originator == NULL + ? -1 : curr_state->originator->unique_num), + curr_state->cost, + curr_state->before_nops_num, curr_state->after_nops_num, + curr_state->accumulated_insns_num, curr_state->branch_deviation, + (ia64_tune == PROCESSOR_ITANIUM + ? ((struct DFA_chip *) curr_state->dfa_state)->oneb_automaton_state + : ((struct DFA_chip *) curr_state->dfa_state)->twob_automaton_state), + INSN_UID (insn)); + } } - - if (ia64_final_schedule) - { - int nr_need_stop = 0; - - for (insnp = ready; insnp < e_ready; insnp++) - if (safe_group_barrier_needed_p (*insnp)) - nr_need_stop++; - - /* Schedule a stop bit if - - all insns require a stop bit, or - - we are starting a new cycle and _any_ insns require a stop bit. - The reason for the latter is that if our schedule is accurate, then - the additional stop won't decrease performance at this point (since - there's a split issue at this point anyway), but it gives us more - freedom when scheduling the currently ready insns. */ - if ((reorder_type == 0 && nr_need_stop) - || (reorder_type == 1 && n_ready == nr_need_stop)) + if (index_to_bundle_states [insn_num] == NULL) + abort (); + /* Finding state with a minimal cost: */ + best_state = NULL; + for (curr_state = index_to_bundle_states [insn_num]; + curr_state != NULL; + curr_state = curr_state->next) + if (curr_state->accumulated_insns_num % 3 == 0 + && (best_state == NULL || best_state->cost > curr_state->cost + || (best_state->cost == curr_state->cost + && (curr_state->accumulated_insns_num + < best_state->accumulated_insns_num + || (curr_state->accumulated_insns_num + == best_state->accumulated_insns_num + && curr_state->branch_deviation + < best_state->branch_deviation))))) + best_state = curr_state; + /* Second (backward) pass: adding nops and templates: */ + insn_num = best_state->before_nops_num; + template0 = template1 = -1; + for (curr_state = best_state; + curr_state->originator != NULL; + curr_state = curr_state->originator) + { + insn = curr_state->insn; + insn_num++; + if (verbose >= 2 && dump) { - schedule_stop (sched_verbose ? dump : NULL); - sched_data.last_was_stop = 1; - maybe_rotate (sched_verbose ? dump : NULL); - if (reorder_type == 1) - return 0; + struct DFA_chip + { + unsigned short one_automaton_state; + unsigned short oneb_automaton_state; + unsigned short two_automaton_state; + unsigned short twob_automaton_state; + }; + + fprintf + (dump, + "// Best %d (orig %d, cost %d, nops %d/%d, insns %d, branch %d, state %d) for %d\n", + curr_state->unique_num, + (curr_state->originator == NULL + ? -1 : curr_state->originator->unique_num), + curr_state->cost, + curr_state->before_nops_num, curr_state->after_nops_num, + curr_state->accumulated_insns_num, curr_state->branch_deviation, + (ia64_tune == PROCESSOR_ITANIUM + ? ((struct DFA_chip *) curr_state->dfa_state)->oneb_automaton_state + : ((struct DFA_chip *) curr_state->dfa_state)->twob_automaton_state), + INSN_UID (insn)); } - else + max_pos = get_max_pos (curr_state->dfa_state); + if (max_pos == 6 || (max_pos == 3 && template0 < 0)) { - int deleted = 0; - insnp = e_ready; - /* Move down everything that needs a stop bit, preserving relative - order. */ - while (insnp-- > ready + deleted) - while (insnp >= ready + deleted) - { - rtx insn = *insnp; - if (! safe_group_barrier_needed_p (insn)) - break; - memmove (ready + 1, ready, (insnp - ready) * sizeof (rtx)); - *ready = insn; - deleted++; - } - n_ready -= deleted; - ready += deleted; - if (deleted != nr_need_stop) + pos = max_pos; + if (max_pos == 3) + template0 = get_template (curr_state->dfa_state, 3); + else + { + template1 = get_template (curr_state->dfa_state, 3); + template0 = get_template (curr_state->dfa_state, 6); + } + } + if (max_pos > 3 && template1 < 0) + { + if (pos > 3) abort (); + template1 = get_template (curr_state->dfa_state, 3); + pos += 3; + } + for (i = 0; i < curr_state->after_nops_num; i++) + { + nop = gen_nop (); + emit_insn_after (nop, insn); + pos--; + if (pos < 0) + abort (); + if (pos % 3 == 0) + { + if (template0 < 0) + abort (); + b = gen_bundle_selector (GEN_INT (template0)); + ia64_emit_insn_before (b, nop); + template0 = template1; + template1 = -1; + } + } + if (INSN_CODE (insn) != CODE_FOR_insn_group_barrier + && GET_CODE (PATTERN (insn)) != ASM_INPUT + && asm_noperands (PATTERN (insn)) < 0) + pos--; + if (ia64_safe_type (insn) == TYPE_L) + pos--; + if (pos < 0) + abort (); + if (pos % 3 == 0 + && INSN_CODE (insn) != CODE_FOR_insn_group_barrier + && GET_CODE (PATTERN (insn)) != ASM_INPUT + && asm_noperands (PATTERN (insn)) < 0) + { + if (template0 < 0) + abort (); + b = gen_bundle_selector (GEN_INT (template0)); + ia64_emit_insn_before (b, insn); + b = PREV_INSN (insn); + insn = b; + template0 = template1; + template1 = -1; + } + for (i = 0; i < curr_state->before_nops_num; i++) + { + nop = gen_nop (); + ia64_emit_insn_before (nop, insn); + nop = PREV_INSN (insn); + insn = nop; + pos--; + if (pos < 0) + abort (); + if (pos % 3 == 0) + { + if (template0 < 0) + abort (); + b = gen_bundle_selector (GEN_INT (template0)); + ia64_emit_insn_before (b, insn); + b = PREV_INSN (insn); + insn = b; + template0 = template1; + template1 = -1; + } } } - - return itanium_reorder (sched_verbose ? dump : NULL, - ready, e_ready, reorder_type == 1); + if (ia64_tune == PROCESSOR_ITANIUM) + /* Insert additional cycles for MM-insns: */ + for (insn = get_next_important_insn (NEXT_INSN (prev_head_insn), tail); + insn != NULL_RTX; + insn = next_insn) + { + if (!INSN_P (insn) + || ia64_safe_itanium_class (insn) == ITANIUM_CLASS_IGNORE + || GET_CODE (PATTERN (insn)) == USE + || GET_CODE (PATTERN (insn)) == CLOBBER) + abort (); + next_insn = get_next_important_insn (NEXT_INSN (insn), tail); + if (INSN_UID (insn) < clocks_length && add_cycles [INSN_UID (insn)]) + { + rtx last; + int i, j, n; + int pred_stop_p; + + last = prev_active_insn (insn); + pred_stop_p = recog_memoized (last) == CODE_FOR_insn_group_barrier; + if (pred_stop_p) + last = prev_active_insn (last); + n = 0; + for (;; last = prev_active_insn (last)) + if (recog_memoized (last) == CODE_FOR_bundle_selector) + { + template0 = XINT (XVECEXP (PATTERN (last), 0, 0), 0); + if (template0 == 9) + PATTERN (last) + = gen_bundle_selector (GEN_INT (2)); /* -> MFI */ + break; + } + else if (recog_memoized (last) != CODE_FOR_insn_group_barrier) + n++; + if ((pred_stop_p && n == 0) || n > 2 + || (template0 == 9 && n != 0)) + abort (); + for (j = 3 - n; j > 0; j --) + ia64_emit_insn_before (gen_nop (), insn); + add_cycles [INSN_UID (insn)]--; + if (!pred_stop_p || add_cycles [INSN_UID (insn)]) + ia64_emit_insn_before (gen_insn_group_barrier (GEN_INT (3)), + insn); + if (pred_stop_p) + add_cycles [INSN_UID (insn)]--; + for (i = add_cycles [INSN_UID (insn)]; i > 0; i--) + { + /* Insert .MII bundle. */ + ia64_emit_insn_before (gen_bundle_selector (GEN_INT (0)), + insn); + ia64_emit_insn_before (gen_nop (), insn); + ia64_emit_insn_before (gen_nop (), insn); + if (i > 1) + { + ia64_emit_insn_before + (gen_insn_group_barrier (GEN_INT (3)), insn); + i--; + } + ia64_emit_insn_before (gen_nop (), insn); + ia64_emit_insn_before (gen_insn_group_barrier (GEN_INT (3)), + insn); + } + ia64_emit_insn_before (gen_bundle_selector (GEN_INT (template0)), + insn); + for (j = n; j > 0; j --) + ia64_emit_insn_before (gen_nop (), insn); + if (pred_stop_p) + ia64_emit_insn_before (gen_insn_group_barrier (GEN_INT (3)), + insn); + } + } + free (index_to_bundle_states); + finish_bundle_state_table (); + bundling_p = 0; + dfa_clean_insn_cache (); } -static int -ia64_sched_reorder (dump, sched_verbose, ready, pn_ready, clock_var) +/* The following function is called at the end of scheduling BB or + EBB. After reload, it inserts stop bits and does insn bundling. */ + +static void +ia64_sched_finish (dump, sched_verbose) FILE *dump; int sched_verbose; - rtx *ready; - int *pn_ready; - int clock_var; { - return ia64_internal_sched_reorder (dump, sched_verbose, ready, - pn_ready, 0, clock_var); + if (sched_verbose) + fprintf (dump, "// Finishing schedule.\n"); + if (!reload_completed) + return; + if (reload_completed) + { + final_emit_insn_group_barriers (dump); + bundling (dump, sched_verbose, current_sched_info->prev_head, + current_sched_info->next_tail); + if (sched_verbose && dump) + fprintf (dump, "// finishing %d-%d\n", + INSN_UID (NEXT_INSN (current_sched_info->prev_head)), + INSN_UID (PREV_INSN (current_sched_info->next_tail))); + + return; + } } -/* Like ia64_sched_reorder, but called after issuing each insn. - Override the default sort algorithm to better slot instructions. */ +/* The following function inserts stop bits in scheduled BB or EBB. */ -static int -ia64_sched_reorder2 (dump, sched_verbose, ready, pn_ready, clock_var) +static void +final_emit_insn_group_barriers (dump) FILE *dump ATTRIBUTE_UNUSED; - int sched_verbose ATTRIBUTE_UNUSED; - rtx *ready; - int *pn_ready; - int clock_var; { - if (sched_data.last_was_stop) - return 0; + rtx insn; + int need_barrier_p = 0; + rtx prev_insn = NULL_RTX; - /* Detect one special case and try to optimize it. - If we have 1.M;;MI 2.MIx, and slots 2.1 (M) and 2.2 (I) are both NOPs, - then we can get better code by transforming this to 1.MFB;; 2.MIx. */ - if (sched_data.first_slot == 1 - && sched_data.stopbit[0] - && ((sched_data.cur == 4 - && (sched_data.types[1] == TYPE_M || sched_data.types[1] == TYPE_A) - && (sched_data.types[2] == TYPE_I || sched_data.types[2] == TYPE_A) - && (sched_data.types[3] != TYPE_M && sched_data.types[3] != TYPE_A)) - || (sched_data.cur == 3 - && (sched_data.types[1] == TYPE_M - || sched_data.types[1] == TYPE_A) - && (sched_data.types[2] != TYPE_M - && sched_data.types[2] != TYPE_I - && sched_data.types[2] != TYPE_A)))) - - { - int i, best; - rtx stop = sched_data.insns[1]; + init_insn_group_barriers (); - /* Search backward for the stop bit that must be there. */ - while (1) + for (insn = NEXT_INSN (current_sched_info->prev_head); + insn != current_sched_info->next_tail; + insn = NEXT_INSN (insn)) + { + if (GET_CODE (insn) == BARRIER) { - int insn_code; - - stop = PREV_INSN (stop); - if (GET_CODE (stop) != INSN) - abort (); - insn_code = recog_memoized (stop); - - /* Ignore .pred.rel.mutex. + rtx last = prev_active_insn (insn); - ??? Update this to ignore cycle display notes too - ??? once those are implemented */ - if (insn_code == CODE_FOR_pred_rel_mutex - || insn_code == CODE_FOR_prologue_use) + if (! last) continue; + if (GET_CODE (last) == JUMP_INSN + && GET_CODE (PATTERN (last)) == ADDR_DIFF_VEC) + last = prev_active_insn (last); + if (recog_memoized (last) != CODE_FOR_insn_group_barrier) + emit_insn_after (gen_insn_group_barrier (GEN_INT (3)), last); - if (insn_code == CODE_FOR_insn_group_barrier) - break; - abort (); + init_insn_group_barriers (); + need_barrier_p = 0; + prev_insn = NULL_RTX; } - - /* Adjust the stop bit's slot selector. */ - if (INTVAL (XVECEXP (PATTERN (stop), 0, 0)) != 1) - abort (); - XVECEXP (PATTERN (stop), 0, 0) = GEN_INT (3); - - sched_data.stopbit[0] = 0; - sched_data.stopbit[2] = 1; - - sched_data.types[5] = sched_data.types[3]; - sched_data.types[4] = sched_data.types[2]; - sched_data.types[3] = sched_data.types[1]; - sched_data.insns[5] = sched_data.insns[3]; - sched_data.insns[4] = sched_data.insns[2]; - sched_data.insns[3] = sched_data.insns[1]; - sched_data.stopbit[5] = sched_data.stopbit[4] = sched_data.stopbit[3] = 0; - sched_data.cur += 2; - sched_data.first_slot = 3; - for (i = 0; i < NR_PACKETS; i++) + else if (INSN_P (insn)) { - const struct ia64_packet *p = packets + i; - if (p->t[0] == TYPE_M && p->t[1] == TYPE_F && p->t[2] == TYPE_B) + if (recog_memoized (insn) == CODE_FOR_insn_group_barrier) { - sched_data.packet = p; - break; + init_insn_group_barriers (); + need_barrier_p = 0; + prev_insn = NULL_RTX; } - } - rotate_one_bundle (sched_verbose ? dump : NULL); - - best = 6; - for (i = 0; i < NR_PACKETS; i++) - { - const struct ia64_packet *p = packets + i; - int split = get_split (p, sched_data.first_slot); - int next; - - /* Disallow multiway branches here. */ - if (p->t[1] == TYPE_B) - continue; - - if (packet_matches_p (p, split, &next) && next < best) + else if (need_barrier_p || group_barrier_needed_p (insn)) { - best = next; - sched_data.packet = p; - sched_data.split = split; + if (TARGET_EARLY_STOP_BITS) + { + rtx last; + + for (last = insn; + last != current_sched_info->prev_head; + last = PREV_INSN (last)) + if (INSN_P (last) && GET_MODE (last) == TImode + && stops_p [INSN_UID (last)]) + break; + if (last == current_sched_info->prev_head) + last = insn; + last = prev_active_insn (last); + if (last + && recog_memoized (last) != CODE_FOR_insn_group_barrier) + emit_insn_after (gen_insn_group_barrier (GEN_INT (3)), + last); + init_insn_group_barriers (); + for (last = NEXT_INSN (last); + last != insn; + last = NEXT_INSN (last)) + if (INSN_P (last)) + group_barrier_needed_p (last); + } + else + { + emit_insn_before (gen_insn_group_barrier (GEN_INT (3)), + insn); + init_insn_group_barriers (); + } + group_barrier_needed_p (insn); + prev_insn = NULL_RTX; } + else if (recog_memoized (insn) >= 0) + prev_insn = insn; + need_barrier_p = (GET_CODE (insn) == CALL_INSN + || GET_CODE (PATTERN (insn)) == ASM_INPUT + || asm_noperands (PATTERN (insn)) >= 0); } - if (best == 6) - abort (); } +} - if (*pn_ready > 0) - { - int more = ia64_internal_sched_reorder (dump, sched_verbose, - ready, pn_ready, 1, - clock_var); - if (more) - return more; - /* Did we schedule a stop? If so, finish this cycle. */ - if (sched_data.cur == sched_data.first_slot) - return 0; - } + - if (sched_verbose) - fprintf (dump, "// Can't issue more this cycle; updating type array.\n"); +/* If the following function returns TRUE, we will use the the DFA + insn scheduler. */ - cycle_end_fill_slots (sched_verbose ? dump : NULL); - if (sched_verbose) - dump_current_packet (dump); - return 0; +static int +ia64_use_dfa_pipeline_interface () +{ + return 1; } -/* We are about to issue INSN. Return the number of insns left on the - ready queue that can be issued this cycle. */ +/* If the following function returns TRUE, we will use the the DFA + insn scheduler. */ static int -ia64_variable_issue (dump, sched_verbose, insn, can_issue_more) - FILE *dump; - int sched_verbose; - rtx insn; - int can_issue_more ATTRIBUTE_UNUSED; +ia64_first_cycle_multipass_dfa_lookahead () { - enum attr_type t = ia64_safe_type (insn); + return (reload_completed ? 6 : 4); +} - if (sched_data.last_was_stop) - { - int t = sched_data.first_slot; - if (t == 0) - t = 3; - ia64_emit_insn_before (gen_insn_group_barrier (GEN_INT (t)), insn); - init_insn_group_barriers (); - sched_data.last_was_stop = 0; - } +/* The following function initiates variable `dfa_pre_cycle_insn'. */ - if (t == TYPE_UNKNOWN) +static void +ia64_init_dfa_pre_cycle_insn () +{ + if (temp_dfa_state == NULL) { - if (sched_verbose) - fprintf (dump, "// Ignoring type %s\n", type_names[t]); - if (GET_CODE (PATTERN (insn)) == ASM_INPUT - || asm_noperands (PATTERN (insn)) >= 0) - { - /* This must be some kind of asm. Clear the scheduling state. */ - rotate_two_bundles (sched_verbose ? dump : NULL); - if (ia64_final_schedule) - group_barrier_needed_p (insn); - } - return 1; + dfa_state_size = state_size (); + temp_dfa_state = xmalloc (dfa_state_size); + prev_cycle_state = xmalloc (dfa_state_size); } + dfa_pre_cycle_insn = make_insn_raw (gen_pre_cycle ()); + PREV_INSN (dfa_pre_cycle_insn) = NEXT_INSN (dfa_pre_cycle_insn) = NULL_RTX; + recog_memoized (dfa_pre_cycle_insn); + dfa_stop_insn = make_insn_raw (gen_insn_group_barrier (GEN_INT (3))); + PREV_INSN (dfa_stop_insn) = NEXT_INSN (dfa_stop_insn) = NULL_RTX; + recog_memoized (dfa_stop_insn); +} - /* This is _not_ just a sanity check. group_barrier_needed_p will update - important state info. Don't delete this test. */ - if (ia64_final_schedule - && group_barrier_needed_p (insn)) - abort (); +/* The following function returns the pseudo insn DFA_PRE_CYCLE_INSN + used by the DFA insn scheduler. */ - sched_data.stopbit[sched_data.cur] = 0; - sched_data.insns[sched_data.cur] = insn; - sched_data.types[sched_data.cur] = t; +static rtx +ia64_dfa_pre_cycle_insn () +{ + return dfa_pre_cycle_insn; +} - sched_data.cur++; - if (sched_verbose) - fprintf (dump, "// Scheduling insn %d of type %s\n", - INSN_UID (insn), type_names[t]); +/* The following function returns TRUE if PRODUCER (of type ilog or + ld) produces address for CONSUMER (of type st or stf). */ - if (GET_CODE (insn) == CALL_INSN && ia64_final_schedule) - { - schedule_stop (sched_verbose ? dump : NULL); - sched_data.last_was_stop = 1; - } +int +ia64_st_address_bypass_p (producer, consumer) + rtx producer; + rtx consumer; +{ + rtx dest, reg, mem; - return 1; + if (producer == NULL_RTX || consumer == NULL_RTX) + abort (); + dest = ia64_single_set (producer); + if (dest == NULL_RTX || (reg = SET_DEST (dest)) == NULL_RTX + || (GET_CODE (reg) != REG && GET_CODE (reg) != SUBREG)) + abort (); + if (GET_CODE (reg) == SUBREG) + reg = SUBREG_REG (reg); + dest = ia64_single_set (consumer); + if (dest == NULL_RTX || (mem = SET_DEST (dest)) == NULL_RTX + || GET_CODE (mem) != MEM) + abort (); + return reg_mentioned_p (reg, mem); } -/* Free data allocated by ia64_sched_init. */ +/* The following function returns TRUE if PRODUCER (of type ilog or + ld) produces address for CONSUMER (of type ld or fld). */ -static void -ia64_sched_finish (dump, sched_verbose) - FILE *dump; - int sched_verbose; +int +ia64_ld_address_bypass_p (producer, consumer) + rtx producer; + rtx consumer; { - if (sched_verbose) - fprintf (dump, "// Finishing schedule.\n"); - rotate_two_bundles (NULL); - free (sched_types); - free (sched_ready); + rtx dest, src, reg, mem; + + if (producer == NULL_RTX || consumer == NULL_RTX) + abort (); + dest = ia64_single_set (producer); + if (dest == NULL_RTX || (reg = SET_DEST (dest)) == NULL_RTX + || (GET_CODE (reg) != REG && GET_CODE (reg) != SUBREG)) + abort (); + if (GET_CODE (reg) == SUBREG) + reg = SUBREG_REG (reg); + src = ia64_single_set (consumer); + if (src == NULL_RTX || (mem = SET_SRC (src)) == NULL_RTX) + abort (); + if (GET_CODE (mem) == UNSPEC && XVECLEN (mem, 0) > 0) + mem = XVECEXP (mem, 0, 0); + while (GET_CODE (mem) == SUBREG || GET_CODE (mem) == ZERO_EXTEND) + mem = XEXP (mem, 0); + if (GET_CODE (mem) != MEM) + abort (); + return reg_mentioned_p (reg, mem); +} + +/* The following function returns TRUE if INSN produces address for a + load/store insn. We will place such insns into M slot because it + decreases its latency time. */ + +int +ia64_produce_address_p (insn) + rtx insn; +{ + return insn->call; } + /* Emit pseudo-ops for the assembler to describe predicate relations. At present this assumes that we only consider predicate pairs to @@ -6887,111 +7006,6 @@ emit_predicate_relation_info () } } -/* Generate a NOP instruction of type T. We will never generate L type - nops. */ - -static rtx -gen_nop_type (t) - enum attr_type t; -{ - switch (t) - { - case TYPE_M: - return gen_nop_m (); - case TYPE_I: - return gen_nop_i (); - case TYPE_B: - return gen_nop_b (); - case TYPE_F: - return gen_nop_f (); - case TYPE_X: - return gen_nop_x (); - default: - abort (); - } -} - -/* After the last scheduling pass, fill in NOPs. It's easier to do this - here than while scheduling. */ - -static void -ia64_emit_nops () -{ - rtx insn; - const struct bundle *b = 0; - int bundle_pos = 0; - - for (insn = get_insns (); insn; insn = NEXT_INSN (insn)) - { - rtx pat; - enum attr_type t; - pat = INSN_P (insn) ? PATTERN (insn) : const0_rtx; - if (GET_CODE (pat) == USE || GET_CODE (pat) == CLOBBER) - continue; - if ((GET_CODE (pat) == UNSPEC && XINT (pat, 1) == UNSPEC_BUNDLE_SELECTOR) - || GET_CODE (insn) == CODE_LABEL) - { - if (b) - while (bundle_pos < 3) - { - emit_insn_before (gen_nop_type (b->t[bundle_pos]), insn); - bundle_pos++; - } - if (GET_CODE (insn) != CODE_LABEL) - b = bundle + INTVAL (XVECEXP (pat, 0, 0)); - else - b = 0; - bundle_pos = 0; - continue; - } - else if (GET_CODE (pat) == UNSPEC_VOLATILE - && XINT (pat, 1) == UNSPECV_INSN_GROUP_BARRIER) - { - int t = INTVAL (XVECEXP (pat, 0, 0)); - if (b) - while (bundle_pos < t) - { - emit_insn_before (gen_nop_type (b->t[bundle_pos]), insn); - bundle_pos++; - } - continue; - } - - if (bundle_pos == 3) - b = 0; - - if (b && INSN_P (insn)) - { - t = ia64_safe_type (insn); - if (asm_noperands (PATTERN (insn)) >= 0 - || GET_CODE (PATTERN (insn)) == ASM_INPUT) - { - while (bundle_pos < 3) - { - emit_insn_before (gen_nop_type (b->t[bundle_pos]), insn); - bundle_pos++; - } - continue; - } - - if (t == TYPE_UNKNOWN) - continue; - while (bundle_pos < 3) - { - if (t == b->t[bundle_pos] - || (t == TYPE_A && (b->t[bundle_pos] == TYPE_M - || b->t[bundle_pos] == TYPE_I))) - break; - - emit_insn_before (gen_nop_type (b->t[bundle_pos]), insn); - bundle_pos++; - } - if (bundle_pos < 3) - bundle_pos++; - } - } -} - /* Perform machine dependent operations on the rtl chain INSNS. */ void @@ -7014,14 +7028,91 @@ ia64_reorg (insns) { timevar_push (TV_SCHED2); ia64_final_schedule = 1; + + initiate_bundle_states (); + ia64_nop = make_insn_raw (gen_nop ()); + PREV_INSN (ia64_nop) = NEXT_INSN (ia64_nop) = NULL_RTX; + recog_memoized (ia64_nop); + clocks_length = get_max_uid () + 1; + stops_p = (char *) xmalloc (clocks_length); + memset (stops_p, 0, clocks_length); + if (ia64_tune == PROCESSOR_ITANIUM) + { + clocks = (int *) xmalloc (clocks_length * sizeof (int)); + memset (clocks, 0, clocks_length * sizeof (int)); + add_cycles = (int *) xmalloc (clocks_length * sizeof (int)); + memset (add_cycles, 0, clocks_length * sizeof (int)); + } + if (ia64_tune == PROCESSOR_ITANIUM2) + { + pos_1 = get_cpu_unit_code ("2_1"); + pos_2 = get_cpu_unit_code ("2_2"); + pos_3 = get_cpu_unit_code ("2_3"); + pos_4 = get_cpu_unit_code ("2_4"); + pos_5 = get_cpu_unit_code ("2_5"); + pos_6 = get_cpu_unit_code ("2_6"); + _0mii_ = get_cpu_unit_code ("2b_0mii."); + _0mmi_ = get_cpu_unit_code ("2b_0mmi."); + _0mfi_ = get_cpu_unit_code ("2b_0mfi."); + _0mmf_ = get_cpu_unit_code ("2b_0mmf."); + _0bbb_ = get_cpu_unit_code ("2b_0bbb."); + _0mbb_ = get_cpu_unit_code ("2b_0mbb."); + _0mib_ = get_cpu_unit_code ("2b_0mib."); + _0mmb_ = get_cpu_unit_code ("2b_0mmb."); + _0mfb_ = get_cpu_unit_code ("2b_0mfb."); + _0mlx_ = get_cpu_unit_code ("2b_0mlx."); + _1mii_ = get_cpu_unit_code ("2b_1mii."); + _1mmi_ = get_cpu_unit_code ("2b_1mmi."); + _1mfi_ = get_cpu_unit_code ("2b_1mfi."); + _1mmf_ = get_cpu_unit_code ("2b_1mmf."); + _1bbb_ = get_cpu_unit_code ("2b_1bbb."); + _1mbb_ = get_cpu_unit_code ("2b_1mbb."); + _1mib_ = get_cpu_unit_code ("2b_1mib."); + _1mmb_ = get_cpu_unit_code ("2b_1mmb."); + _1mfb_ = get_cpu_unit_code ("2b_1mfb."); + _1mlx_ = get_cpu_unit_code ("2b_1mlx."); + } + else + { + pos_1 = get_cpu_unit_code ("1_1"); + pos_2 = get_cpu_unit_code ("1_2"); + pos_3 = get_cpu_unit_code ("1_3"); + pos_4 = get_cpu_unit_code ("1_4"); + pos_5 = get_cpu_unit_code ("1_5"); + pos_6 = get_cpu_unit_code ("1_6"); + _0mii_ = get_cpu_unit_code ("1b_0mii."); + _0mmi_ = get_cpu_unit_code ("1b_0mmi."); + _0mfi_ = get_cpu_unit_code ("1b_0mfi."); + _0mmf_ = get_cpu_unit_code ("1b_0mmf."); + _0bbb_ = get_cpu_unit_code ("1b_0bbb."); + _0mbb_ = get_cpu_unit_code ("1b_0mbb."); + _0mib_ = get_cpu_unit_code ("1b_0mib."); + _0mmb_ = get_cpu_unit_code ("1b_0mmb."); + _0mfb_ = get_cpu_unit_code ("1b_0mfb."); + _0mlx_ = get_cpu_unit_code ("1b_0mlx."); + _1mii_ = get_cpu_unit_code ("1b_1mii."); + _1mmi_ = get_cpu_unit_code ("1b_1mmi."); + _1mfi_ = get_cpu_unit_code ("1b_1mfi."); + _1mmf_ = get_cpu_unit_code ("1b_1mmf."); + _1bbb_ = get_cpu_unit_code ("1b_1bbb."); + _1mbb_ = get_cpu_unit_code ("1b_1mbb."); + _1mib_ = get_cpu_unit_code ("1b_1mib."); + _1mmb_ = get_cpu_unit_code ("1b_1mmb."); + _1mfb_ = get_cpu_unit_code ("1b_1mfb."); + _1mlx_ = get_cpu_unit_code ("1b_1mlx."); + } schedule_ebbs (rtl_dump_file); + finish_bundle_states (); + if (ia64_tune == PROCESSOR_ITANIUM) + { + free (add_cycles); + free (clocks); + } + free (stops_p); + emit_insn_group_barriers (rtl_dump_file, insns); + ia64_final_schedule = 0; timevar_pop (TV_SCHED2); - - /* This relies on the NOTE_INSN_BASIC_BLOCK notes to be in the same - place as they were during scheduling. */ - emit_insn_group_barriers (rtl_dump_file, insns); - ia64_emit_nops (); } else emit_all_insn_group_barriers (rtl_dump_file, insns); diff --git a/gcc/config/ia64/ia64.h b/gcc/config/ia64/ia64.h index 091510c387c..970827bbf4d 100644 --- a/gcc/config/ia64/ia64.h +++ b/gcc/config/ia64/ia64.h @@ -95,6 +95,8 @@ extern int target_flags; #define MASK_DWARF2_ASM 0x40000000 /* test dwarf2 line info via gas. */ +#define MASK_EARLY_STOP_BITS 0x00002000 /* tune stop bits for the model. */ + #define TARGET_BIG_ENDIAN (target_flags & MASK_BIG_ENDIAN) #define TARGET_GNU_AS (target_flags & MASK_GNU_AS) @@ -137,6 +139,7 @@ extern int ia64_tls_size; #define TARGET_TLS14 (ia64_tls_size == 14) #define TARGET_TLS22 (ia64_tls_size == 22) #define TARGET_TLS64 (ia64_tls_size == 64) +#define TARGET_EARLY_STOP_BITS (target_flags & MASK_EARLY_STOP_BITS) #define TARGET_HPUX_LD 0 @@ -188,6 +191,10 @@ extern int ia64_tls_size; N_("Enable Dwarf 2 line debug info via GNU as")}, \ { "no-dwarf2-asm", -MASK_DWARF2_ASM, \ N_("Disable Dwarf 2 line debug info via GNU as")}, \ + { "early-stop-bits", MASK_EARLY_STOP_BITS, \ + N_("Enable earlier placing stop bits for better scheduling")}, \ + { "no-early-stop-bits", -MASK_EARLY_STOP_BITS, \ + N_("Disable earlier placing stop bits")}, \ SUBTARGET_SWITCHES \ { "", TARGET_DEFAULT | TARGET_CPU_DEFAULT, \ NULL } \ @@ -213,12 +220,30 @@ extern int ia64_tls_size; extern const char *ia64_fixed_range_string; extern const char *ia64_tls_size_string; + +/* Which processor to schedule for. The cpu attribute defines a list + that mirrors this list, so changes to i64.md must be made at the + same time. */ + +enum processor_type +{ + PROCESSOR_ITANIUM, /* Original Itanium. */ + PROCESSOR_ITANIUM2, + PROCESSOR_max +}; + +extern enum processor_type ia64_tune; + +extern const char *ia64_tune_string; + #define TARGET_OPTIONS \ { \ { "fixed-range=", &ia64_fixed_range_string, \ N_("Specify range of registers to make fixed")}, \ { "tls-size=", &ia64_tls_size_string, \ N_("Specify bit size of immediate TLS offsets")}, \ + { "tune=", &ia64_tune_string, \ + N_("Schedule code for given CPU")}, \ } /* Sometimes certain combinations of command options do not make sense on a @@ -2485,4 +2510,9 @@ enum fetchop_code { #undef PROFILE_BEFORE_PROLOGUE #define PROFILE_BEFORE_PROLOGUE 1 + + +/* Switch on code for querying unit reservations. */ +#define CPU_UNITS_QUERY 1 + /* End of ia64.h */ diff --git a/gcc/config/ia64/ia64.md b/gcc/config/ia64/ia64.md index c2275494c25..a96e92fc900 100644 --- a/gcc/config/ia64/ia64.md +++ b/gcc/config/ia64/ia64.md @@ -91,6 +91,10 @@ ;; :: ;; :::::::::::::::::::: +;; Processor type. This attribute must exactly match the processor_type +;; enumeration in ia64.h. +(define_attr "cpu" "itanium,itanium2" (const (symbol_ref "ia64_tune"))) + ;; Instruction type. This primarily determines how instructions can be ;; packed in bundles, and secondarily affects scheduling to function units. @@ -110,8 +114,8 @@ (define_attr "itanium_class" "unknown,ignore,stop_bit,br,fcmp,fcvtfx,fld, fmac,fmisc,frar_i,frar_m,frbr,frfr,frpr,ialu,icmp,ilog,ishf,ld, chk_s,long_i,mmmul,mmshf,mmshfi,rse_m,scall,sem,stf,st,syst_m0, - syst_m,tbit,toar_i,toar_m,tobr,tofr,topr,xmpy,xtd,nop_b,nop_f, - nop_i,nop_m,nop_x,lfetch" + syst_m,tbit,toar_i,toar_m,tobr,tofr,topr,xmpy,xtd,nop,nop_b,nop_f, + nop_i,nop_m,nop_x,lfetch,pre_cycle" (const_string "unknown")) ;; chk_s has an I and an M form; use type A for convenience. @@ -146,76 +150,23 @@ (define_attr "predicable" "no,yes" (const_string "yes")) -;; :::::::::::::::::::: -;; :: -;; :: Function Units -;; :: -;; :::::::::::::::::::: -;; We define 6 "dummy" functional units. All the real work to decide which -;; insn uses which unit is done by our MD_SCHED_REORDER hooks. We only -;; have to ensure here that there are enough copies of the dummy unit so -;; that the scheduler doesn't get confused by MD_SCHED_REORDER. -;; Other than the 6 dummies for normal insns, we also add a single dummy unit -;; for stop bits. - -(define_function_unit "dummy" 6 1 (eq_attr "itanium_class" "br") 0 0) -(define_function_unit "dummy" 6 1 (eq_attr "itanium_class" "scall") 0 0) -(define_function_unit "dummy" 6 1 (eq_attr "itanium_class" "fcmp") 2 0) -(define_function_unit "dummy" 6 1 (eq_attr "itanium_class" "fcvtfx") 7 0) -(define_function_unit "dummy" 6 1 (eq_attr "itanium_class" "fld") 9 0) -(define_function_unit "dummy" 6 1 (eq_attr "itanium_class" "fmac") 5 0) -(define_function_unit "dummy" 6 1 (eq_attr "itanium_class" "fmisc") 5 0) - -;; There is only one insn `mov = ar.bsp' for frar_i: -(define_function_unit "dummy" 6 1 (eq_attr "itanium_class" "frar_i") 13 0) -;; There is only ony insn `mov = ar.unat' for frar_m: -(define_function_unit "dummy" 6 1 (eq_attr "itanium_class" "frar_m") 6 0) -(define_function_unit "dummy" 6 1 (eq_attr "itanium_class" "frbr") 2 0) -(define_function_unit "dummy" 6 1 (eq_attr "itanium_class" "frfr") 2 0) -(define_function_unit "dummy" 6 1 (eq_attr "itanium_class" "frpr") 2 0) - -(define_function_unit "dummy" 6 1 (eq_attr "itanium_class" "ialu") 1 0) -(define_function_unit "dummy" 6 1 (eq_attr "itanium_class" "icmp") 1 0) -(define_function_unit "dummy" 6 1 (eq_attr "itanium_class" "ilog") 1 0) -(define_function_unit "dummy" 6 1 (eq_attr "itanium_class" "ishf") 1 0) -(define_function_unit "dummy" 6 1 (eq_attr "itanium_class" "ld") 2 0) -(define_function_unit "dummy" 6 1 (eq_attr "itanium_class" "long_i") 1 0) -(define_function_unit "dummy" 6 1 (eq_attr "itanium_class" "mmmul") 2 0) -(define_function_unit "dummy" 6 1 (eq_attr "itanium_class" "mmshf") 2 0) -(define_function_unit "dummy" 6 1 (eq_attr "itanium_class" "mmshfi") 2 0) - -;; Now we have only one insn (flushrs) of such class. We assume that flushrs -;; is the 1st syllable of the bundle after stop bit. -(define_function_unit "dummy" 6 1 (eq_attr "itanium_class" "rse_m") 0 0) -(define_function_unit "dummy" 6 1 (eq_attr "itanium_class" "sem") 11 0) -(define_function_unit "dummy" 6 1 (eq_attr "itanium_class" "stf") 1 0) -(define_function_unit "dummy" 6 1 (eq_attr "itanium_class" "st") 1 0) -(define_function_unit "dummy" 6 1 (eq_attr "itanium_class" "syst_m0") 1 0) -;; Now we use only one insn `mf'. Therfore latency time is set up to 0. -(define_function_unit "dummy" 6 1 (eq_attr "itanium_class" "syst_m") 0 0) -(define_function_unit "dummy" 6 1 (eq_attr "itanium_class" "tbit") 1 0) - -;; There is only one insn `mov ar.pfs =' for toar_i therefore we use -;; latency time equal to 0: -(define_function_unit "dummy" 6 1 (eq_attr "itanium_class" "toar_i") 0 0) -;; There are only ony 2 insns `mov ar.ccv =' and `mov ar.unat =' for toar_m: -(define_function_unit "dummy" 6 1 (eq_attr "itanium_class" "toar_m") 5 0) -(define_function_unit "dummy" 6 1 (eq_attr "itanium_class" "tobr") 1 0) -(define_function_unit "dummy" 6 1 (eq_attr "itanium_class" "tofr") 9 0) -(define_function_unit "dummy" 6 1 (eq_attr "itanium_class" "topr") 1 0) -(define_function_unit "dummy" 6 1 (eq_attr "itanium_class" "xmpy") 7 0) -(define_function_unit "dummy" 6 1 (eq_attr "itanium_class" "xtd") 1 0) - -(define_function_unit "dummy" 6 1 (eq_attr "itanium_class" "nop_m") 0 0) -(define_function_unit "dummy" 6 1 (eq_attr "itanium_class" "nop_i") 0 0) -(define_function_unit "dummy" 6 1 (eq_attr "itanium_class" "nop_f") 0 0) -(define_function_unit "dummy" 6 1 (eq_attr "itanium_class" "nop_b") 0 0) -(define_function_unit "dummy" 6 1 (eq_attr "itanium_class" "nop_x") 0 0) - -(define_function_unit "stop_bit" 1 1 (eq_attr "itanium_class" "stop_bit") 0 0) -(define_function_unit "dummy" 6 1 (eq_attr "itanium_class" "ignore") 0 0) -(define_function_unit "dummy" 6 1 (eq_attr "itanium_class" "unknown") 0 0) +;; DFA descriptions of ia64 processors used for insn scheduling and +;; bundling. + +(automata_option "ndfa") + +;; Uncomment the following line to output automata for debugging. +;; (automata_option "v") + +(automata_option "w") + +;;(automata_option "no-minimization") + + +(include "itanium1.md") +(include "itanium2.md") + ;; :::::::::::::::::::: ;; :: @@ -5089,7 +5040,7 @@ [(const_int 0)] "" "nop 0" - [(set_attr "itanium_class" "unknown")]) + [(set_attr "itanium_class" "nop")]) (define_insn "nop_m" [(const_int 1)] @@ -5121,6 +5072,14 @@ "" [(set_attr "itanium_class" "nop_x")]) +;; The following insn will be never generated. It is used only by +;; insn scheduler to change state before advancing cycle. +(define_insn "pre_cycle" + [(const_int 6)] + "" + "" + [(set_attr "itanium_class" "pre_cycle")]) + (define_insn "bundle_selector" [(unspec [(match_operand 0 "const_int_operand" "")] UNSPEC_BUNDLE_SELECTOR)] "" diff --git a/gcc/config/ia64/itanium1.md b/gcc/config/ia64/itanium1.md new file mode 100644 index 00000000000..2728ed3de25 --- /dev/null +++ b/gcc/config/ia64/itanium1.md @@ -0,0 +1,1616 @@ +;; Itanium1 (original Itanium) DFA descriptions for insn scheduling +;; and bundling. +;; Copyright (C) 2002 Free Software Foundation, Inc. +;; Contributed by Vladimir Makarov <vmakarov@redhat.com>. +;; +;; This file is part of GNU CC. +;; +;; GNU CC is free software; you can redistribute it and/or modify +;; it under the terms of the GNU General Public License as published by +;; the Free Software Foundation; either version 2, or (at your option) +;; any later version. +;; +;; GNU CC is distributed in the hope that it will be useful, +;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;; GNU General Public License for more details. +;; +;; You should have received a copy of the GNU General Public License +;; along with GNU CC; see the file COPYING. If not, write to +;; the Free Software Foundation, 59 Temple Place - Suite 330, +;; Boston, MA 02111-1307, USA. */ +;; + + +/* This is description of pipeline hazards based on DFA. The + following constructions can be used for this: + + o define_cpu_unit string [string]) describes a cpu functional unit + (separated by comma). + + 1st operand: Names of cpu function units. + 2nd operand: Name of automaton (see comments for + DEFINE_AUTOMATON). + + All define_reservations and define_cpu_units should have unique + names which can not be "nothing". + + o (exclusion_set string string) means that each CPU function unit + in the first string can not be reserved simultaneously with each + unit whose name is in the second string and vise versa. CPU + units in the string are separated by commas. For example, it is + useful for description CPU with fully pipelined floating point + functional unit which can execute simultaneously only single + floating point insns or only double floating point insns. + + o (presence_set string string) means that each CPU function unit in + the first string can not be reserved unless at least one of + pattern of units whose names are in the second string is + reserved. This is an asymmetric relation. CPU units or unit + patterns in the strings are separated by commas. Pattern is one + unit name or unit names separated by white-spaces. + + For example, it is useful for description that slot1 is reserved + after slot0 reservation for a VLIW processor. We could describe + it by the following construction + + (presence_set "slot1" "slot0") + + Or slot1 is reserved only after slot0 and unit b0 reservation. + In this case we could write + + (presence_set "slot1" "slot0 b0") + + All CPU functional units in a set should belong to the same + automaton. + + o (final_presence_set string string) is analogous to + `presence_set'. The difference between them is when checking is + done. When an instruction is issued in given automaton state + reflecting all current and planned unit reservations, the + automaton state is changed. The first state is a source state, + the second one is a result state. Checking for `presence_set' is + done on the source state reservation, checking for + `final_presence_set' is done on the result reservation. This + construction is useful to describe a reservation which is + actually two subsequent reservations. For example, if we use + + (presence_set "slot1" "slot0") + + the following insn will be never issued (because slot1 requires + slot0 which is absent in the source state). + + (define_reservation "insn_and_nop" "slot0 + slot1") + + but it can be issued if we use analogous `final_presence_set'. + + o (absence_set string string) means that each CPU function unit in + the first string can be reserved only if each pattern of units + whose names are in the second string is not reserved. This is an + asymmetric relation (actually exclusion set is analogous to this + one but it is symmetric). CPU units or unit patterns in the + string are separated by commas. Pattern is one unit name or unit + names separated by white-spaces. + + For example, it is useful for description that slot0 can not be + reserved after slot1 or slot2 reservation for a VLIW processor. + We could describe it by the following construction + + (absence_set "slot2" "slot0, slot1") + + Or slot2 can not be reserved if slot0 and unit b0 are reserved or + slot1 and unit b1 are reserved . In this case we could write + + (absence_set "slot2" "slot0 b0, slot1 b1") + + All CPU functional units in a set should to belong the same + automaton. + + o (final_absence_set string string) is analogous to `absence_set' but + checking is done on the result (state) reservation. See comments + for final_presence_set. + + o (define_bypass number out_insn_names in_insn_names) names bypass with + given latency (the first number) from insns given by the first + string (see define_insn_reservation) into insns given by the + second string. Insn names in the strings are separated by + commas. + + o (define_automaton string) describes names of an automaton + generated and used for pipeline hazards recognition. The names + are separated by comma. Actually it is possibly to generate the + single automaton but unfortunately it can be very large. If we + use more one automata, the summary size of the automata usually + is less than the single one. The automaton name is used in + define_cpu_unit. All automata should have unique names. + + o (automata_option string) describes option for generation of + automata. Currently there are the following options: + + o "no-minimization" which makes no minimization of automata. + This is only worth to do when we are debugging the description + and need to look more accurately at reservations of states. + + o "ndfa" which makes automata with nondetermenistic reservation + by insns. + + o (define_reservation string string) names reservation (the first + string) of cpu functional units (the 2nd string). Sometimes unit + reservations for different insns contain common parts. In such + case, you describe common part and use one its name (the 1st + parameter) in regular expression in define_insn_reservation. All + define_reservations, define results and define_cpu_units should + have unique names which can not be "nothing". + + o (define_insn_reservation name default_latency condition regexpr) + describes reservation of cpu functional units (the 3nd operand) + for instruction which is selected by the condition (the 2nd + parameter). The first parameter is used for output of debugging + information. The reservations are described by a regular + expression according the following syntax: + + regexp = regexp "," oneof + | oneof + + oneof = oneof "|" allof + | allof + + allof = allof "+" repeat + | repeat + + repeat = element "*" number + | element + + element = cpu_function_name + | reservation_name + | result_name + | "nothing" + | "(" regexp ")" + + 1. "," is used for describing start of the next cycle in + reservation. + + 2. "|" is used for describing the reservation described by the + first regular expression *or* the reservation described by + the second regular expression *or* etc. + + 3. "+" is used for describing the reservation described by the + first regular expression *and* the reservation described by + the second regular expression *and* etc. + + 4. "*" is used for convinience and simply means sequence in + which the regular expression are repeated NUMBER times with + cycle advancing (see ","). + + 5. cpu function unit name which means reservation. + + 6. reservation name -- see define_reservation. + + 7. string "nothing" means no units reservation. + +*/ + +(define_automaton "one") + +;; All possible combinations of bundles/syllables +(define_cpu_unit "1_0m.ii, 1_0m.mi, 1_0m.fi, 1_0m.mf, 1_0b.bb, 1_0m.bb,\ + 1_0m.ib, 1_0m.mb, 1_0m.fb, 1_0m.lx" "one") +(define_cpu_unit "1_0mi.i, 1_0mm.i, 1_0mf.i, 1_0mm.f, 1_0bb.b, 1_0mb.b,\ + 1_0mi.b, 1_0mm.b, 1_0mf.b, 1_0mlx." "one") +(define_cpu_unit "1_0mii., 1_0mmi., 1_0mfi., 1_0mmf., 1_0bbb., 1_0mbb.,\ + 1_0mib., 1_0mmb., 1_0mfb." "one") + +(define_cpu_unit "1_1m.ii, 1_1m.mi, 1_1m.fi, 1_1b.bb, 1_1m.bb,\ + 1_1m.ib, 1_1m.mb, 1_1m.fb, 1_1m.lx" "one") +(define_cpu_unit "1_1mi.i, 1_1mm.i, 1_1mf.i, 1_1bb.b, 1_1mb.b,\ + 1_1mi.b, 1_1mm.b, 1_1mf.b, 1_1mlx." "one") +(define_cpu_unit "1_1mii., 1_1mmi., 1_1mfi., 1_1bbb., 1_1mbb.,\ + 1_1mib., 1_1mmb., 1_1mfb." "one") + +;; Slot 1 +(exclusion_set "1_0m.ii" + "1_0m.mi, 1_0m.fi, 1_0m.mf, 1_0b.bb, 1_0m.bb, 1_0m.ib, 1_0m.mb, 1_0m.fb,\ + 1_0m.lx") +(exclusion_set "1_0m.mi" + "1_0m.fi, 1_0m.mf, 1_0b.bb, 1_0m.bb, 1_0m.ib, 1_0m.mb, 1_0m.fb, 1_0m.lx") +(exclusion_set "1_0m.fi" + "1_0m.mf, 1_0b.bb, 1_0m.bb, 1_0m.ib, 1_0m.mb, 1_0m.fb, 1_0m.lx") +(exclusion_set "1_0m.mf" + "1_0b.bb, 1_0m.bb, 1_0m.ib, 1_0m.mb, 1_0m.fb, 1_0m.lx") +(exclusion_set "1_0b.bb" "1_0m.bb, 1_0m.ib, 1_0m.mb, 1_0m.fb, 1_0m.lx") +(exclusion_set "1_0m.bb" "1_0m.ib, 1_0m.mb, 1_0m.fb, 1_0m.lx") +(exclusion_set "1_0m.ib" "1_0m.mb, 1_0m.fb, 1_0m.lx") +(exclusion_set "1_0m.mb" "1_0m.fb, 1_0m.lx") +(exclusion_set "1_0m.fb" "1_0m.lx") + +;; Slot 2 +(exclusion_set "1_0mi.i" + "1_0mm.i, 1_0mf.i, 1_0mm.f, 1_0bb.b, 1_0mb.b, 1_0mi.b, 1_0mm.b, 1_0mf.b,\ + 1_0mlx.") +(exclusion_set "1_0mm.i" + "1_0mf.i, 1_0mm.f, 1_0bb.b, 1_0mb.b, 1_0mi.b, 1_0mm.b, 1_0mf.b, 1_0mlx.") +(exclusion_set "1_0mf.i" + "1_0mm.f, 1_0bb.b, 1_0mb.b, 1_0mi.b, 1_0mm.b, 1_0mf.b, 1_0mlx.") +(exclusion_set "1_0mm.f" + "1_0bb.b, 1_0mb.b, 1_0mi.b, 1_0mm.b, 1_0mf.b, 1_0mlx.") +(exclusion_set "1_0bb.b" "1_0mb.b, 1_0mi.b, 1_0mm.b, 1_0mf.b, 1_0mlx.") +(exclusion_set "1_0mb.b" "1_0mi.b, 1_0mm.b, 1_0mf.b, 1_0mlx.") +(exclusion_set "1_0mi.b" "1_0mm.b, 1_0mf.b, 1_0mlx.") +(exclusion_set "1_0mm.b" "1_0mf.b, 1_0mlx.") +(exclusion_set "1_0mf.b" "1_0mlx.") + +;; Slot 3 +(exclusion_set "1_0mii." + "1_0mmi., 1_0mfi., 1_0mmf., 1_0bbb., 1_0mbb., 1_0mib., 1_0mmb., 1_0mfb.,\ + 1_0mlx.") +(exclusion_set "1_0mmi." + "1_0mfi., 1_0mmf., 1_0bbb., 1_0mbb., 1_0mib., 1_0mmb., 1_0mfb., 1_0mlx.") +(exclusion_set "1_0mfi." + "1_0mmf., 1_0bbb., 1_0mbb., 1_0mib., 1_0mmb., 1_0mfb., 1_0mlx.") +(exclusion_set "1_0mmf." + "1_0bbb., 1_0mbb., 1_0mib., 1_0mmb., 1_0mfb., 1_0mlx.") +(exclusion_set "1_0bbb." "1_0mbb., 1_0mib., 1_0mmb., 1_0mfb., 1_0mlx.") +(exclusion_set "1_0mbb." "1_0mib., 1_0mmb., 1_0mfb., 1_0mlx.") +(exclusion_set "1_0mib." "1_0mmb., 1_0mfb., 1_0mlx.") +(exclusion_set "1_0mmb." "1_0mfb., 1_0mlx.") +(exclusion_set "1_0mfb." "1_0mlx.") + +;; Slot 4 +(exclusion_set "1_1m.ii" + "1_1m.mi, 1_1m.fi, 1_1b.bb, 1_1m.bb, 1_1m.ib, 1_1m.mb, 1_1m.fb, 1_1m.lx") +(exclusion_set "1_1m.mi" + "1_1m.fi, 1_1b.bb, 1_1m.bb, 1_1m.ib, 1_1m.mb, 1_1m.fb, 1_1m.lx") +(exclusion_set "1_1m.fi" + "1_1b.bb, 1_1m.bb, 1_1m.ib, 1_1m.mb, 1_1m.fb, 1_1m.lx") +(exclusion_set "1_1b.bb" "1_1m.bb, 1_1m.ib, 1_1m.mb, 1_1m.fb, 1_1m.lx") +(exclusion_set "1_1m.bb" "1_1m.ib, 1_1m.mb, 1_1m.fb, 1_1m.lx") +(exclusion_set "1_1m.ib" "1_1m.mb, 1_1m.fb, 1_1m.lx") +(exclusion_set "1_1m.mb" "1_1m.fb, 1_1m.lx") +(exclusion_set "1_1m.fb" "1_1m.lx") + +;; Slot 5 +(exclusion_set "1_1mi.i" + "1_1mm.i, 1_1mf.i, 1_1bb.b, 1_1mb.b, 1_1mi.b, 1_1mm.b, 1_1mf.b, 1_1mlx.") +(exclusion_set "1_1mm.i" + "1_1mf.i, 1_1bb.b, 1_1mb.b, 1_1mi.b, 1_1mm.b, 1_1mf.b, 1_1mlx.") +(exclusion_set "1_1mf.i" + "1_1bb.b, 1_1mb.b, 1_1mi.b, 1_1mm.b, 1_1mf.b, 1_1mlx.") +(exclusion_set "1_1bb.b" "1_1mb.b, 1_1mi.b, 1_1mm.b, 1_1mf.b, 1_1mlx.") +(exclusion_set "1_1mb.b" "1_1mi.b, 1_1mm.b, 1_1mf.b, 1_1mlx.") +(exclusion_set "1_1mi.b" "1_1mm.b, 1_1mf.b, 1_1mlx.") +(exclusion_set "1_1mm.b" "1_1mf.b, 1_1mlx.") +(exclusion_set "1_1mf.b" "1_1mlx.") + +;; Slot 6 +(exclusion_set "1_1mii." + "1_1mmi., 1_1mfi., 1_1bbb., 1_1mbb., 1_1mib., 1_1mmb., 1_1mfb., 1_1mlx.") +(exclusion_set "1_1mmi." + "1_1mfi., 1_1bbb., 1_1mbb., 1_1mib., 1_1mmb., 1_1mfb., 1_1mlx.") +(exclusion_set "1_1mfi." + "1_1bbb., 1_1mbb., 1_1mib., 1_1mmb., 1_1mfb., 1_1mlx.") +(exclusion_set "1_1bbb." "1_1mbb., 1_1mib., 1_1mmb., 1_1mfb., 1_1mlx.") +(exclusion_set "1_1mbb." "1_1mib., 1_1mmb., 1_1mfb., 1_1mlx.") +(exclusion_set "1_1mib." "1_1mmb., 1_1mfb., 1_1mlx.") +(exclusion_set "1_1mmb." "1_1mfb., 1_1mlx.") +(exclusion_set "1_1mfb." "1_1mlx.") + +(final_presence_set "1_0mi.i" "1_0m.ii") +(final_presence_set "1_0mii." "1_0mi.i") +(final_presence_set "1_1mi.i" "1_1m.ii") +(final_presence_set "1_1mii." "1_1mi.i") + +(final_presence_set "1_0mm.i" "1_0m.mi") +(final_presence_set "1_0mmi." "1_0mm.i") +(final_presence_set "1_1mm.i" "1_1m.mi") +(final_presence_set "1_1mmi." "1_1mm.i") + +(final_presence_set "1_0mf.i" "1_0m.fi") +(final_presence_set "1_0mfi." "1_0mf.i") +(final_presence_set "1_1mf.i" "1_1m.fi") +(final_presence_set "1_1mfi." "1_1mf.i") + +(final_presence_set "1_0mm.f" "1_0m.mf") +(final_presence_set "1_0mmf." "1_0mm.f") + +(final_presence_set "1_0bb.b" "1_0b.bb") +(final_presence_set "1_0bbb." "1_0bb.b") +(final_presence_set "1_1bb.b" "1_1b.bb") +(final_presence_set "1_1bbb." "1_1bb.b") + +(final_presence_set "1_0mb.b" "1_0m.bb") +(final_presence_set "1_0mbb." "1_0mb.b") +(final_presence_set "1_1mb.b" "1_1m.bb") +(final_presence_set "1_1mbb." "1_1mb.b") + +(final_presence_set "1_0mi.b" "1_0m.ib") +(final_presence_set "1_0mib." "1_0mi.b") +(final_presence_set "1_1mi.b" "1_1m.ib") +(final_presence_set "1_1mib." "1_1mi.b") + +(final_presence_set "1_0mm.b" "1_0m.mb") +(final_presence_set "1_0mmb." "1_0mm.b") +(final_presence_set "1_1mm.b" "1_1m.mb") +(final_presence_set "1_1mmb." "1_1mm.b") + +(final_presence_set "1_0mf.b" "1_0m.fb") +(final_presence_set "1_0mfb." "1_0mf.b") +(final_presence_set "1_1mf.b" "1_1m.fb") +(final_presence_set "1_1mfb." "1_1mf.b") + +(final_presence_set "1_0mlx." "1_0m.lx") +(final_presence_set "1_1mlx." "1_1m.lx") + +(final_presence_set + "1_1m.ii,1_1m.mi,1_1m.fi,1_1b.bb,1_1m.bb,1_1m.ib,1_1m.mb,1_1m.fb,1_1m.lx" + "1_0mii.,1_0mmi.,1_0mfi.,1_0mmf.,1_0bbb.,1_0mbb.,1_0mib.,1_0mmb.,1_0mfb.,\ + 1_0mlx.") + +;; Microarchitecture units: +(define_cpu_unit + "1_um0, 1_um1, 1_ui0, 1_ui1, 1_uf0, 1_uf1, 1_ub0, 1_ub1, 1_ub2,\ + 1_unb0, 1_unb1, 1_unb2" "one") + +(exclusion_set "1_ub0" "1_unb0") +(exclusion_set "1_ub1" "1_unb1") +(exclusion_set "1_ub2" "1_unb2") + +;; The following rules are used to decrease number of alternatives. +;; They are consequences of Itanium microarchitecture. They also +;; describe the following rules mentioned in Itanium +;; microarchitecture: rules mentioned in Itanium microarchitecture: +;; o "MMF: Always splits issue before the first M and after F regardless +;; of surrounding bundles and stops". +;; o "BBB/MBB: Always splits issue after either of these bundles". +;; o "MIB BBB: Split issue after the first bundle in this pair". + +(exclusion_set "1_0m.mf,1_0mm.f,1_0mmf." + "1_1m.ii,1_1m.mi,1_1m.fi,1_1b.bb,1_1m.bb,1_1m.ib,1_1m.mb,1_1m.fb,1_1m.lx") +(exclusion_set "1_0b.bb,1_0bb.b,1_0bbb.,1_0m.bb,1_0mb.b,1_0mbb." + "1_1m.ii,1_1m.mi,1_1m.fi,1_1b.bb,1_1m.bb,1_1m.ib,1_1m.mb,1_1m.fb,1_1m.lx") +(exclusion_set "1_0m.ib,1_0mi.b,1_0mib." "1_1b.bb") + +;; For exceptions of M, I, B, F insns: +(define_cpu_unit "1_not_um1, 1_not_ui1, 1_not_uf1" "one") + +(final_absence_set "1_not_um1" "1_um1") +(final_absence_set "1_not_ui1" "1_ui1") +(final_absence_set "1_not_uf1" "1_uf1") + +;;; "MIB/MFB/MMB: Splits issue after any of these bundles unless the +;;; B-slot contains a nop.b or a brp instruction". +;;; "The B in an MIB/MFB/MMB bundle disperses to B0 if it is a brp or +;;; nop.b, otherwise it disperses to B2". +(final_absence_set + "1_1m.ii, 1_1m.mi, 1_1m.fi, 1_1b.bb, 1_1m.bb, 1_1m.ib, 1_1m.mb, 1_1m.fb,\ + 1_1m.lx" + "1_0mib. 1_ub2, 1_0mfb. 1_ub2, 1_0mmb. 1_ub2") + +;; This is necessary to start new processor cycle when we meet stop bit. +(define_cpu_unit "1_stop" "one") +(final_absence_set + "1_0m.ii,1_0mi.i,1_0mii.,1_0m.mi,1_0mm.i,1_0mmi.,1_0m.fi,1_0mf.i,1_0mfi.,\ + 1_0m.mf,1_0mm.f,1_0mmf.,1_0b.bb,1_0bb.b,1_0bbb.,1_0m.bb,1_0mb.b,1_0mbb.,\ + 1_0m.ib,1_0mi.b,1_0mib.,1_0m.mb,1_0mm.b,1_0mmb.,1_0m.fb,1_0mf.b,1_0mfb.,\ + 1_0m.lx,1_0mlx., \ + 1_1m.ii,1_1mi.i,1_1mii.,1_1m.mi,1_1mm.i,1_1mmi.,1_1m.fi,1_1mf.i,1_1mfi.,\ + 1_1b.bb,1_1bb.b,1_1bbb.,1_1m.bb,1_1mb.b,1_1mbb.,1_1m.ib,1_1mi.b,1_1mib.,\ + 1_1m.mb,1_1mm.b,1_1mmb.,1_1m.fb,1_1mf.b,1_1mfb.,1_1m.lx,1_1mlx." + "1_stop") + +;; M and I instruction is dispersed to the lowest numbered M or I unit +;; not already in use. An I slot in the 3rd position of 2nd bundle is +;; always dispersed to I1 +(final_presence_set "1_um1" "1_um0") +(final_presence_set "1_ui1" "1_ui0, 1_1mii., 1_1mmi., 1_1mfi.") + +;; Insns + +;; M and I instruction is dispersed to the lowest numbered M or I unit +;; not already in use. An I slot in the 3rd position of 2nd bundle is +;; always dispersed to I1 +(define_reservation "1_M0" + "1_0m.ii+1_um0|1_0m.mi+1_um0|1_0mm.i+(1_um0|1_um1)\ + |1_0m.fi+1_um0|1_0m.mf+1_um0|1_0mm.f+1_um1\ + |1_0m.bb+1_um0|1_0m.ib+1_um0|1_0m.mb+1_um0\ + |1_0mm.b+1_um1|1_0m.fb+1_um0|1_0m.lx+1_um0\ + |1_1mm.i+1_um1|1_1mm.b+1_um1\ + |(1_1m.ii|1_1m.mi|1_1m.fi|1_1m.bb|1_1m.ib|1_1m.mb|1_1m.fb|1_1m.lx)\ + +(1_um0|1_um1)") + +(define_reservation "1_M1" + "(1_0mii.+(1_ui0|1_ui1)|1_0mmi.+1_ui0|1_0mfi.+1_ui0\ + |1_0mib.+1_unb0|1_0mfb.+1_unb0|1_0mmb.+1_unb0)\ + +(1_1m.ii|1_1m.mi|1_1m.fi|1_1m.bb|1_1m.ib|1_1m.mb|1_1m.fb|1_1m.lx)\ + +(1_um0|1_um1)") + +(define_reservation "1_M" "1_M0|1_M1") + +;; Exceptions for dispersal rules. +;; "An I slot in the 3rd position of 2nd bundle is always dispersed to I1". +(define_reservation "1_I0" + "1_0mi.i+1_ui0|1_0mii.+(1_ui0|1_ui1)|1_0mmi.+1_ui0|1_0mfi.+1_ui0\ + |1_0mi.b+1_ui0|(1_1mi.i|1_1mi.b)+(1_ui0|1_ui1)\ + |1_1mii.+1_ui1|1_1mmi.+1_ui1|1_1mfi.+1_ui1") + +(define_reservation "1_I1" + "1_0m.ii+1_um0+1_0mi.i+1_ui0|1_0mm.i+(1_um0|1_um1)+1_0mmi.+1_ui0\ + |1_0mf.i+1_uf0+1_0mfi.+1_ui0|1_0m.ib+1_um0+1_0mi.b+1_ui0\ + |(1_1m.ii+(1_um0|1_um1)+1_1mi.i\ + |1_1m.ib+(1_um0|1_um1)+1_1mi.b)+(1_ui0|1_ui1)\ + |1_1mm.i+1_um1+1_1mmi.+1_ui1|1_1mf.i+1_uf1+1_1mfi.+1_ui1") + +(define_reservation "1_I" "1_I0|1_I1") + +;; "An F slot in the 1st bundle disperses to F0". +;; "An F slot in the 2st bundle disperses to F1". +(define_reservation "1_F0" + "1_0mf.i+1_uf0|1_0mmf.+1_uf0|1_0mf.b+1_uf0|1_1mf.i+1_uf1|1_1mf.b+1_uf1") + +(define_reservation "1_F1" + "1_0m.fi+1_um0+1_0mf.i+1_uf0|1_0mm.f+(1_um0|1_um1)+1_0mmf.+1_uf0\ + |1_0m.fb+1_um0+1_0mf.b+1_uf0|1_1m.fi+(1_um0|1_um1)+1_1mf.i+1_uf1\ + |1_1m.fb+(1_um0|1_um1)+1_1mf.b+1_uf1") + +(define_reservation "1_F2" + "1_0m.mf+1_um0+1_0mm.f+1_um1+1_0mmf.+1_uf0\ + |(1_0mii.+(1_ui0|1_ui1)|1_0mmi.+1_ui0|1_0mfi.+1_ui0\ + |1_0mib.+1_unb0|1_0mmb.+1_unb0|1_0mfb.+1_unb0)\ + +(1_1m.fi+(1_um0|1_um1)+1_1mf.i+1_uf1\ + |1_1m.fb+(1_um0|1_um1)+1_1mf.b+1_uf1)") + +(define_reservation "1_F" "1_F0|1_F1|1_F2") + +;;; "Each B slot in MBB or BBB bundle disperses to the corresponding B +;;; unit. That is, a B slot in 1st position is despersed to B0. In the +;;; 2nd position it is dispersed to B2". +(define_reservation "1_NB" + "1_0b.bb+1_unb0|1_0bb.b+1_unb1|1_0bbb.+1_unb2\ + |1_0mb.b+1_unb1|1_0mbb.+1_unb2\ + |1_0mib.+1_unb0|1_0mmb.+1_unb0|1_0mfb.+1_unb0\ + |1_1b.bb+1_unb0|1_1bb.b+1_unb1\ + |1_1bbb.+1_unb2|1_1mb.b+1_unb1|1_1mbb.+1_unb2|1_1mib.+1_unb0\ + |1_1mmb.+1_unb0|1_1mfb.+1_unb0") + +(define_reservation "1_B0" + "1_0b.bb+1_ub0|1_0bb.b+1_ub1|1_0bbb.+1_ub2\ + |1_0mb.b+1_ub1|1_0mbb.+1_ub2|1_0mib.+1_ub2\ + |1_0mfb.+1_ub2|1_1b.bb+1_ub0|1_1bb.b+1_ub1\ + |1_1bbb.+1_ub2|1_1mb.b+1_ub1\ + |1_1mib.+1_ub2|1_1mmb.+1_ub2|1_1mfb.+1_ub2") + +(define_reservation "1_B1" + "1_0m.bb+1_um0+1_0mb.b+1_ub1|1_0mi.b+1_ui0+1_0mib.+1_ub2\ + |1_0mf.b+1_uf0+1_0mfb.+1_ub2\ + |(1_0mii.+(1_ui0|1_ui1)|1_0mmi.+1_ui0|1_0mfi.+1_ui0)+1_1b.bb+1_ub0\ + |1_1m.bb+(1_um0|1_um1)+1_1mb.b+1_ub1\ + |1_1mi.b+(1_ui0|1_ui1)+1_1mib.+1_ub2\ + |1_1mm.b+1_um1+1_1mmb.+1_ub2\ + |1_1mf.b+1_uf1+1_1mfb.+1_ub2") + +(define_reservation "1_B" "1_B0|1_B1") + +;; MLX bunlde uses ports equivalent to MFI bundles. +(define_reservation "1_L0" "1_0mlx.+1_ui0+1_uf0|1_1mlx.+(1_ui0|1_ui1)+1_uf1") +(define_reservation "1_L1" + "1_0m.lx+1_um0+1_0mlx.+1_ui0+1_uf0\ + |1_1m.lx+(1_um0|1_um1)+1_1mlx.+(1_ui0|1_ui1)+1_uf1") +(define_reservation "1_L2" + "(1_0mii.+(1_ui0|1_ui1)|1_0mmi.+1_ui0|1_0mfi.+1_ui0\ + |1_0mib.+1_unb0|1_0mmb.+1_unb0|1_0mfb.+1_unb0) + +1_1m.lx+(1_um0|1_um1)+1_1mlx.+1_ui1+1_uf1") +(define_reservation "1_L" "1_L0|1_L1|1_L2") + +(define_reservation "1_A" "1_M|1_I") + +(define_insn_reservation "1_stop_bit" 0 + (and (and (eq_attr "cpu" "itanium") + (eq_attr "itanium_class" "stop_bit")) + (eq (symbol_ref "bundling_p") (const_int 0))) + "1_stop|1_m0_stop|1_m1_stop|1_mi0_stop|1_mi1_stop") + +(define_insn_reservation "1_br" 0 + (and (and (eq_attr "cpu" "itanium") + (eq_attr "itanium_class" "br")) + (eq (symbol_ref "bundling_p") (const_int 0))) "1_B") +(define_insn_reservation "1_scall" 0 + (and (and (eq_attr "cpu" "itanium") + (eq_attr "itanium_class" "scall")) + (eq (symbol_ref "bundling_p") (const_int 0))) "1_B") +(define_insn_reservation "1_fcmp" 2 + (and (and (eq_attr "cpu" "itanium") + (eq_attr "itanium_class" "fcmp")) + (eq (symbol_ref "bundling_p") (const_int 0))) + "1_F+1_not_uf1") +(define_insn_reservation "1_fcvtfx" 7 + (and (and (eq_attr "cpu" "itanium") + (eq_attr "itanium_class" "fcvtfx")) + (eq (symbol_ref "bundling_p") (const_int 0))) "1_F") +(define_insn_reservation "1_fld" 9 + (and (and (eq_attr "cpu" "itanium") + (eq_attr "itanium_class" "fld")) + (eq (symbol_ref "bundling_p") (const_int 0))) "1_M") +(define_insn_reservation "1_fmac" 5 + (and (and (eq_attr "cpu" "itanium") + (eq_attr "itanium_class" "fmac")) + (eq (symbol_ref "bundling_p") (const_int 0))) "1_F") +(define_insn_reservation "1_fmisc" 5 + (and (and (eq_attr "cpu" "itanium") + (eq_attr "itanium_class" "fmisc")) + (eq (symbol_ref "bundling_p") (const_int 0))) + "1_F+1_not_uf1") + +;; There is only one insn `mov = ar.bsp' for frar_i: +(define_insn_reservation "1_frar_i" 13 + (and (and (eq_attr "cpu" "itanium") + (eq_attr "itanium_class" "frar_i")) + (eq (symbol_ref "bundling_p") (const_int 0))) + "1_I+1_not_ui1") +;; There is only two insns `mov = ar.unat' or `mov = ar.ccv' for frar_m: +(define_insn_reservation "1_frar_m" 6 + (and (and (eq_attr "cpu" "itanium") + (eq_attr "itanium_class" "frar_m")) + (eq (symbol_ref "bundling_p") (const_int 0))) + "1_M+1_not_um1") +(define_insn_reservation "1_frbr" 2 + (and (and (eq_attr "cpu" "itanium") + (eq_attr "itanium_class" "frbr")) + (eq (symbol_ref "bundling_p") (const_int 0))) + "1_I+1_not_ui1") +(define_insn_reservation "1_frfr" 2 + (and (and (eq_attr "cpu" "itanium") + (eq_attr "itanium_class" "frfr")) + (eq (symbol_ref "bundling_p") (const_int 0))) + "1_M+1_not_um1") +(define_insn_reservation "1_frpr" 2 + (and (and (eq_attr "cpu" "itanium") + (eq_attr "itanium_class" "frpr")) + (eq (symbol_ref "bundling_p") (const_int 0))) + "1_I+1_not_ui1") + +(define_insn_reservation "1_ialu" 1 + (and (and (eq_attr "cpu" "itanium") + (eq_attr "itanium_class" "ialu")) + (eq (symbol_ref + "bundling_p || ia64_produce_address_p (insn)") + (const_int 0))) + "1_A") +(define_insn_reservation "1_ialu_addr" 1 + (and (and (eq_attr "cpu" "itanium") + (eq_attr "itanium_class" "ialu")) + (eq (symbol_ref + "!bundling_p && ia64_produce_address_p (insn)") + (const_int 1))) + "1_M") +(define_insn_reservation "1_icmp" 1 + (and (and (eq_attr "cpu" "itanium") + (eq_attr "itanium_class" "icmp")) + (eq (symbol_ref "bundling_p") (const_int 0))) "1_A") +(define_insn_reservation "1_ilog" 1 + (and (and (eq_attr "cpu" "itanium") + (eq_attr "itanium_class" "ilog")) + (eq (symbol_ref "bundling_p") (const_int 0))) "1_A") +(define_insn_reservation "1_ishf" 1 + (and (and (eq_attr "cpu" "itanium") + (eq_attr "itanium_class" "ishf")) + (eq (symbol_ref "bundling_p") (const_int 0))) + "1_I+1_not_ui1") +(define_insn_reservation "1_ld" 2 + (and (and (eq_attr "cpu" "itanium") + (eq_attr "itanium_class" "ld")) + (eq (symbol_ref "bundling_p") (const_int 0))) "1_M") +(define_insn_reservation "1_long_i" 1 + (and (and (eq_attr "cpu" "itanium") + (eq_attr "itanium_class" "long_i")) + (eq (symbol_ref "bundling_p") (const_int 0))) "1_L") +(define_insn_reservation "1_mmmul" 2 + (and (and (eq_attr "cpu" "itanium") + (eq_attr "itanium_class" "mmmul")) + (eq (symbol_ref "bundling_p") (const_int 0))) + "1_I+1_not_ui1") +(define_insn_reservation "1_mmshf" 2 + (and (and (eq_attr "cpu" "itanium") + (eq_attr "itanium_class" "mmshf")) + (eq (symbol_ref "bundling_p") (const_int 0))) "1_I") +(define_insn_reservation "1_mmshfi" 1 + (and (and (eq_attr "cpu" "itanium") + (eq_attr "itanium_class" "mmshfi")) + (eq (symbol_ref "bundling_p") (const_int 0))) "1_I") + +;; Now we have only one insn (flushrs) of such class. We assume that flushrs +;; is the 1st syllable of the bundle after stop bit. +(define_insn_reservation "1_rse_m" 0 + (and (and (eq_attr "cpu" "itanium") + (eq_attr "itanium_class" "rse_m")) + (eq (symbol_ref "bundling_p") (const_int 0))) + "(1_0m.ii|1_0m.mi|1_0m.fi|1_0m.mf|1_0b.bb|1_0m.bb\ + |1_0m.ib|1_0m.mb|1_0m.fb|1_0m.lx)+1_um0") +(define_insn_reservation "1_sem" 0 + (and (and (eq_attr "cpu" "itanium") + (eq_attr "itanium_class" "sem")) + (eq (symbol_ref "bundling_p") (const_int 0))) + "1_M+1_not_um1") +(define_insn_reservation "1_stf" 1 + (and (and (eq_attr "cpu" "itanium") + (eq_attr "itanium_class" "stf")) + (eq (symbol_ref "bundling_p") (const_int 0))) "1_M") +(define_insn_reservation "1_st" 1 + (and (and (eq_attr "cpu" "itanium") + (eq_attr "itanium_class" "st")) + (eq (symbol_ref "bundling_p") (const_int 0))) "1_M") +(define_insn_reservation "1_syst_m0" 0 + (and (and (eq_attr "cpu" "itanium") + (eq_attr "itanium_class" "syst_m0")) + (eq (symbol_ref "bundling_p") (const_int 0))) + "1_M+1_not_um1") +(define_insn_reservation "1_syst_m" 0 + (and (and (eq_attr "cpu" "itanium") + (eq_attr "itanium_class" "syst_m")) + (eq (symbol_ref "bundling_p") (const_int 0))) "1_M") +(define_insn_reservation "1_tbit" 1 + (and (and (eq_attr "cpu" "itanium") + (eq_attr "itanium_class" "tbit")) + (eq (symbol_ref "bundling_p") (const_int 0))) + "1_I+1_not_ui1") + +;; There is only ony insn `mov ar.pfs =' for toar_i: +(define_insn_reservation "1_toar_i" 0 + (and (and (eq_attr "cpu" "itanium") + (eq_attr "itanium_class" "toar_i")) + (eq (symbol_ref "bundling_p") (const_int 0))) + "1_I+1_not_ui1") +;; There are only ony 2 insns `mov ar.ccv =' and `mov ar.unat =' for toar_m: +(define_insn_reservation "1_toar_m" 5 + (and (and (eq_attr "cpu" "itanium") + (eq_attr "itanium_class" "toar_m")) + (eq (symbol_ref "bundling_p") (const_int 0))) + "1_M+1_not_um1") +(define_insn_reservation "1_tobr" 1 + (and (and (eq_attr "cpu" "itanium") + (eq_attr "itanium_class" "tobr")) + (eq (symbol_ref "bundling_p") (const_int 0))) + "1_I+1_not_ui1") +(define_insn_reservation "1_tofr" 9 + (and (and (eq_attr "cpu" "itanium") + (eq_attr "itanium_class" "tofr")) + (eq (symbol_ref "bundling_p") (const_int 0))) "1_M") +(define_insn_reservation "1_topr" 1 + (and (and (eq_attr "cpu" "itanium") + (eq_attr "itanium_class" "topr")) + (eq (symbol_ref "bundling_p") (const_int 0))) + "1_I+1_not_ui1") +(define_insn_reservation "1_xmpy" 7 + (and (and (eq_attr "cpu" "itanium") + (eq_attr "itanium_class" "xmpy")) + (eq (symbol_ref "bundling_p") (const_int 0))) "1_F") +(define_insn_reservation "1_xtd" 1 + (and (and (eq_attr "cpu" "itanium") + (eq_attr "itanium_class" "xtd")) + (eq (symbol_ref "bundling_p") (const_int 0))) "1_I") + +(define_insn_reservation "1_chk_s" 0 + (and (and (eq_attr "cpu" "itanium") + (eq_attr "itanium_class" "chk_s")) + (eq (symbol_ref "bundling_p") (const_int 0))) "1_A") +(define_insn_reservation "1_lfetch" 0 + (and (and (eq_attr "cpu" "itanium") + (eq_attr "itanium_class" "lfetch")) + (eq (symbol_ref "bundling_p") (const_int 0))) "1_M") + +(define_insn_reservation "1_nop_m" 0 + (and (and (eq_attr "cpu" "itanium") + (eq_attr "itanium_class" "nop_m")) + (eq (symbol_ref "bundling_p") (const_int 0))) "1_M0") +(define_insn_reservation "1_nop_b" 0 + (and (and (eq_attr "cpu" "itanium") + (eq_attr "itanium_class" "nop_b")) + (eq (symbol_ref "bundling_p") (const_int 0))) "1_NB") +(define_insn_reservation "1_nop_i" 0 + (and (and (eq_attr "cpu" "itanium") + (eq_attr "itanium_class" "nop_i")) + (eq (symbol_ref "bundling_p") (const_int 0))) "1_I0") +(define_insn_reservation "1_nop_f" 0 + (and (and (eq_attr "cpu" "itanium") + (eq_attr "itanium_class" "nop_f")) + (eq (symbol_ref "bundling_p") (const_int 0))) "1_F0") +(define_insn_reservation "1_nop_x" 0 + (and (and (eq_attr "cpu" "itanium") + (eq_attr "itanium_class" "nop_x")) + (eq (symbol_ref "bundling_p") (const_int 0))) "1_L0") + +;; We assume that there is no insn issued on the same cycle as unknown insn. +(define_cpu_unit "1_empty" "one") +(exclusion_set "1_empty" + "1_0m.ii,1_0m.mi,1_0m.fi,1_0m.mf,1_0b.bb,1_0m.bb,1_0m.ib,1_0m.mb,1_0m.fb,\ + 1_0m.lx") + +(define_insn_reservation "1_unknown" 1 + (and (and (eq_attr "cpu" "itanium") + (eq_attr "itanium_class" "unknown")) + (eq (symbol_ref "bundling_p") (const_int 0))) "1_empty") + +(define_insn_reservation "1_nop" 1 + (and (and (eq_attr "cpu" "itanium") + (eq_attr "itanium_class" "nop")) + (eq (symbol_ref "bundling_p") (const_int 0))) + "1_M0|1_NB|1_I0|1_F0") + +(define_insn_reservation "1_ignore" 0 + (and (and (eq_attr "cpu" "itanium") + (eq_attr "itanium_class" "ignore")) + (eq (symbol_ref "bundling_p") (const_int 0))) "nothing") + + +(define_cpu_unit + "1_0m_bs, 1_0mi_bs, 1_0mm_bs, 1_0mf_bs, 1_0b_bs, 1_0bb_bs, 1_0mb_bs" + "one") +(define_cpu_unit + "1_1m_bs, 1_1mi_bs, 1_1mm_bs, 1_1mf_bs, 1_1b_bs, 1_1bb_bs, 1_1mb_bs" + "one") + +(define_cpu_unit "1_m_cont, 1_mi_cont, 1_mm_cont, 1_mf_cont, 1_mb_cont,\ + 1_b_cont, 1_bb_cont" "one") + +;; For stop in the middle of the bundles. +(define_cpu_unit "1_m_stop, 1_m0_stop, 1_m1_stop, 1_0mmi_cont" "one") +(define_cpu_unit "1_mi_stop, 1_mi0_stop, 1_mi1_stop, 1_0mii_cont" "one") + +(final_presence_set "1_0m_bs" + "1_0m.ii, 1_0m.mi, 1_0m.mf, 1_0m.fi, 1_0m.bb,\ + 1_0m.ib, 1_0m.fb, 1_0m.mb, 1_0m.lx") +(final_presence_set "1_1m_bs" + "1_1m.ii, 1_1m.mi, 1_1m.fi, 1_1m.bb, 1_1m.ib, 1_1m.fb, 1_1m.mb,\ + 1_1m.lx") +(final_presence_set "1_0mi_bs" "1_0mi.i, 1_0mi.i") +(final_presence_set "1_1mi_bs" "1_1mi.i, 1_1mi.i") +(final_presence_set "1_0mm_bs" "1_0mm.i, 1_0mm.f, 1_0mm.b") +(final_presence_set "1_1mm_bs" "1_1mm.i, 1_1mm.b") +(final_presence_set "1_0mf_bs" "1_0mf.i, 1_0mf.b") +(final_presence_set "1_1mf_bs" "1_1mf.i, 1_1mf.b") +(final_presence_set "1_0b_bs" "1_0b.bb") +(final_presence_set "1_1b_bs" "1_1b.bb") +(final_presence_set "1_0bb_bs" "1_0bb.b") +(final_presence_set "1_1bb_bs" "1_1bb.b") +(final_presence_set "1_0mb_bs" "1_0mb.b") +(final_presence_set "1_1mb_bs" "1_1mb.b") + +(exclusion_set "1_0m_bs" + "1_0mi.i, 1_0mm.i, 1_0mm.f, 1_0mf.i, 1_0mb.b,\ + 1_0mi.b, 1_0mf.b, 1_0mm.b, 1_0mlx., 1_m0_stop") +(exclusion_set "1_1m_bs" + "1_1mi.i, 1_1mm.i, 1_1mf.i, 1_1mb.b, 1_1mi.b, 1_1mf.b, 1_1mm.b,\ + 1_1mlx., 1_m1_stop") +(exclusion_set "1_0mi_bs" "1_0mii., 1_0mib., 1_mi0_stop") +(exclusion_set "1_1mi_bs" "1_1mii., 1_1mib., 1_mi1_stop") +(exclusion_set "1_0mm_bs" "1_0mmi., 1_0mmf., 1_0mmb.") +(exclusion_set "1_1mm_bs" "1_1mmi., 1_1mmb.") +(exclusion_set "1_0mf_bs" "1_0mfi., 1_0mfb.") +(exclusion_set "1_1mf_bs" "1_1mfi., 1_1mfb.") +(exclusion_set "1_0b_bs" "1_0bb.b") +(exclusion_set "1_1b_bs" "1_1bb.b") +(exclusion_set "1_0bb_bs" "1_0bbb.") +(exclusion_set "1_1bb_bs" "1_1bbb.") +(exclusion_set "1_0mb_bs" "1_0mbb.") +(exclusion_set "1_1mb_bs" "1_1mbb.") + +(exclusion_set + "1_0m_bs, 1_0mi_bs, 1_0mm_bs, 1_0mf_bs, 1_0b_bs, 1_0bb_bs, 1_0mb_bs, + 1_1m_bs, 1_1mi_bs, 1_1mm_bs, 1_1mf_bs, 1_1b_bs, 1_1bb_bs, 1_1mb_bs" + "1_stop") + +(final_presence_set + "1_0mi.i, 1_0mm.i, 1_0mf.i, 1_0mm.f, 1_0mb.b,\ + 1_0mi.b, 1_0mm.b, 1_0mf.b, 1_0mlx." + "1_m_cont") +(final_presence_set "1_0mii., 1_0mib." "1_mi_cont") +(final_presence_set "1_0mmi., 1_0mmf., 1_0mmb." "1_mm_cont") +(final_presence_set "1_0mfi., 1_0mfb." "1_mf_cont") +(final_presence_set "1_0bb.b" "1_b_cont") +(final_presence_set "1_0bbb." "1_bb_cont") +(final_presence_set "1_0mbb." "1_mb_cont") + +(exclusion_set + "1_0m.ii, 1_0m.mi, 1_0m.fi, 1_0m.mf, 1_0b.bb, 1_0m.bb,\ + 1_0m.ib, 1_0m.mb, 1_0m.fb, 1_0m.lx" + "1_m_cont, 1_mi_cont, 1_mm_cont, 1_mf_cont,\ + 1_mb_cont, 1_b_cont, 1_bb_cont") + +(exclusion_set "1_empty" + "1_m_cont,1_mi_cont,1_mm_cont,1_mf_cont,\ + 1_mb_cont,1_b_cont,1_bb_cont") + +;; For m;mi bundle +(final_presence_set "1_m0_stop" "1_0m.mi") +(final_presence_set "1_0mm.i" "1_0mmi_cont") +(exclusion_set "1_0mmi_cont" + "1_0m.ii, 1_0m.mi, 1_0m.fi, 1_0m.mf, 1_0b.bb, 1_0m.bb,\ + 1_0m.ib, 1_0m.mb, 1_0m.fb, 1_0m.lx") +(exclusion_set "1_m0_stop" "1_0mm.i") +(final_presence_set "1_m1_stop" "1_1m.mi") +(exclusion_set "1_m1_stop" "1_1mm.i") +(final_presence_set "1_m_stop" "1_m0_stop, 1_m1_stop") + +;; For mi;i bundle +(final_presence_set "1_mi0_stop" "1_0mi.i") +(final_presence_set "1_0mii." "1_0mii_cont") +(exclusion_set "1_0mii_cont" + "1_0m.ii, 1_0m.mi, 1_0m.fi, 1_0m.mf, 1_0b.bb, 1_0m.bb,\ + 1_0m.ib, 1_0m.mb, 1_0m.fb, 1_0m.lx") +(exclusion_set "1_mi0_stop" "1_0mii.") +(final_presence_set "1_mi1_stop" "1_1mi.i") +(exclusion_set "1_mi1_stop" "1_1mii.") +(final_presence_set "1_mi_stop" "1_mi0_stop, 1_mi1_stop") + +(final_absence_set + "1_0m.ii,1_0mi.i,1_0mii.,1_0m.mi,1_0mm.i,1_0mmi.,1_0m.fi,1_0mf.i,1_0mfi.,\ + 1_0m.mf,1_0mm.f,1_0mmf.,1_0b.bb,1_0bb.b,1_0bbb.,1_0m.bb,1_0mb.b,1_0mbb.,\ + 1_0m.ib,1_0mi.b,1_0mib.,1_0m.mb,1_0mm.b,1_0mmb.,1_0m.fb,1_0mf.b,1_0mfb.,\ + 1_0m.lx,1_0mlx., \ + 1_1m.ii,1_1mi.i,1_1mii.,1_1m.mi,1_1mm.i,1_1mmi.,1_1m.fi,1_1mf.i,1_1mfi.,\ + 1_1b.bb,1_1bb.b,1_1bbb.,1_1m.bb,1_1mb.b,1_1mbb.,\ + 1_1m.ib,1_1mi.b,1_1mib.,1_1m.mb,1_1mm.b,1_1mmb.,1_1m.fb,1_1mf.b,1_1mfb.,\ + 1_1m.lx,1_1mlx." + "1_m0_stop,1_m1_stop,1_mi0_stop,1_mi1_stop") + +(define_cpu_unit "1_m_cont_only, 1_b_cont_only" "one") +(define_cpu_unit "1_mi_cont_only, 1_mm_cont_only, 1_mf_cont_only" "one") +(define_cpu_unit "1_mb_cont_only, 1_bb_cont_only" "one") + +(final_presence_set "1_m_cont_only" "1_m_cont") +(exclusion_set "1_m_cont_only" + "1_0mi.i, 1_0mm.i, 1_0mf.i, 1_0mm.f, 1_0mb.b,\ + 1_0mi.b, 1_0mm.b, 1_0mf.b, 1_0mlx.") + +(final_presence_set "1_b_cont_only" "1_b_cont") +(exclusion_set "1_b_cont_only" "1_0bb.b") + +(final_presence_set "1_mi_cont_only" "1_mi_cont") +(exclusion_set "1_mi_cont_only" "1_0mii., 1_0mib.") + +(final_presence_set "1_mm_cont_only" "1_mm_cont") +(exclusion_set "1_mm_cont_only" "1_0mmi., 1_0mmf., 1_0mmb.") + +(final_presence_set "1_mf_cont_only" "1_mf_cont") +(exclusion_set "1_mf_cont_only" "1_0mfi., 1_0mfb.") + +(final_presence_set "1_mb_cont_only" "1_mb_cont") +(exclusion_set "1_mb_cont_only" "1_0mbb.") + +(final_presence_set "1_bb_cont_only" "1_bb_cont") +(exclusion_set "1_bb_cont_only" "1_0bbb.") + +(define_insn_reservation "1_pre_cycle" 0 + (and (and (eq_attr "cpu" "itanium") + (eq_attr "itanium_class" "pre_cycle")) + (eq (symbol_ref "bundling_p") (const_int 0))) + "(1_0m_bs, 1_m_cont) \ + | (1_0mi_bs, (1_mi_cont|nothing)) \ + | (1_0mm_bs, 1_mm_cont) \ + | (1_0mf_bs, (1_mf_cont|nothing)) \ + | (1_0b_bs, (1_b_cont|nothing)) \ + | (1_0bb_bs, (1_bb_cont|nothing)) \ + | (1_0mb_bs, (1_mb_cont|nothing)) \ + | (1_1m_bs, 1_m_cont) \ + | (1_1mi_bs, (1_mi_cont|nothing)) \ + | (1_1mm_bs, 1_mm_cont) \ + | (1_1mf_bs, (1_mf_cont|nothing)) \ + | (1_1b_bs, (1_b_cont|nothing)) \ + | (1_1bb_bs, (1_bb_cont|nothing)) \ + | (1_1mb_bs, (1_mb_cont|nothing)) \ + | (1_m_cont_only, (1_m_cont|nothing)) \ + | (1_b_cont_only, (1_b_cont|nothing)) \ + | (1_mi_cont_only, (1_mi_cont|nothing)) \ + | (1_mm_cont_only, (1_mm_cont|nothing)) \ + | (1_mf_cont_only, (1_mf_cont|nothing)) \ + | (1_mb_cont_only, (1_mb_cont|nothing)) \ + | (1_bb_cont_only, (1_bb_cont|nothing)) \ + | (1_m_stop, (1_0mmi_cont|nothing)) \ + | (1_mi_stop, (1_0mii_cont|nothing))") + +;; Bypasses: +(define_bypass 1 "1_fcmp" "1_br,1_scall") +;; ??? I found 7 cycle dealy for 1_fmac -> 1_fcmp for Itanium1 +(define_bypass 7 "1_fmac" "1_fmisc,1_fcvtfx,1_xmpy,1_fcmp") + +;; ??? +(define_bypass 3 "1_frbr" "1_mmmul,1_mmshf") +(define_bypass 14 "1_frar_i" "1_mmmul,1_mmshf") +(define_bypass 7 "1_frar_m" "1_mmmul,1_mmshf") + +;; ???? +;; There is only one insn `mov ar.pfs =' for toar_i. +(define_bypass 0 "1_tobr,1_topr,1_toar_i" "1_br,1_scall") + +(define_bypass 3 "1_ialu,1_ialu_addr" "1_mmmul,1_mmshf") +;; ??? howto describe ialu for I slot only. We use ialu_addr for that +;;(define_bypass 2 "1_ialu" "1_ld" "ia64_ld_address_bypass_p") +;; ??? howto describe ialu st/address for I slot only. We use ialu_addr +;; for that. +;;(define_bypass 2 "1_ialu" "1_st" "ia64_st_address_bypass_p") + +(define_bypass 0 "1_icmp" "1_br,1_scall") + +(define_bypass 3 "1_ilog" "1_mmmul,1_mmshf") + +(define_bypass 2 "1_ilog,1_xtd" "1_ld" "ia64_ld_address_bypass_p") +(define_bypass 2 "1_ilog,1_xtd" "1_st" "ia64_st_address_bypass_p") + +(define_bypass 3 "1_ld" "1_mmmul,1_mmshf") +(define_bypass 3 "1_ld" "1_ld" "ia64_ld_address_bypass_p") +(define_bypass 3 "1_ld" "1_st" "ia64_st_address_bypass_p") + +;; Intel docs say only LD, ST, IALU, ILOG, ISHF consumers have latency 4, +;; but HP engineers say any non-MM operation. +(define_bypass 4 "1_mmmul,1_mmshf" + "1_br,1_fcmp,1_fcvtfx,1_fld,1_fmac,1_fmisc,1_frar_i,1_frar_m,\ + 1_frbr,1_frfr,1_frpr,1_ialu,1_icmp,1_ilog,1_ishf,1_ld,1_chk_s,\ + 1_long_i,1_rse_m,1_sem,1_stf,1_st,1_syst_m0,1_syst_m,\ + 1_tbit,1_toar_i,1_toar_m,1_tobr,1_tofr,1_topr,1_xmpy,1_xtd") + +;; ??? how to describe that if scheduled < 4 cycle then latency is 10 cycles. +;; (define_bypass 10 "1_mmmul,1_mmshf" "1_ialu,1_ilog,1_ishf,1_st,1_ld") + +(define_bypass 0 "1_tbit" "1_br,1_scall") + +(define_bypass 8 "1_tofr" "1_frfr,1_stf") +(define_bypass 7 "1_fmisc,1_fcvtfx,1_fmac,1_xmpy" "1_frfr") +(define_bypass 8 "1_fmisc,1_fcvtfx,1_fmac,1_xmpy" "1_stf") + +;; We don't use here fcmp because scall may be predicated. +(define_bypass 0 "1_fcvtfx,1_fld,1_fmac,1_fmisc,1_frar_i,1_frar_m,\ + 1_frbr,1_frfr,1_frpr,1_ialu,1_ialu_addr,1_ilog,1_ishf,\ + 1_ld,1_long_i,1_mmmul,1_mmshf,1_mmshfi,1_toar_m,1_tofr,\ + 1_xmpy,1_xtd" "1_scall") + +(define_bypass 0 "1_unknown,1_ignore,1_stop_bit,1_br,1_fcmp,1_fcvtfx,\ + 1_fld,1_fmac,1_fmisc,1_frar_i,1_frar_m,1_frbr,1_frfr,\ + 1_frpr,1_ialu,1_ialu_addr,1_icmp,1_ilog,1_ishf,1_ld,\ + 1_chk_s,1_long_i,1_mmmul,1_mmshf,1_mmshfi,1_nop,\ + 1_nop_b,1_nop_f,1_nop_i,1_nop_m,1_nop_x,1_rse_m,1_scall,\ + 1_sem,1_stf,1_st,1_syst_m0,1_syst_m,1_tbit,1_toar_i,\ + 1_toar_m,1_tobr,1_tofr,1_topr,1_xmpy,1_xtd,1_lfetch" + "1_ignore") + + +;; Bundling + +(define_automaton "oneb") + +;; Pseudo units for quicker searching for position in two packet window. */ +(define_query_cpu_unit "1_1,1_2,1_3,1_4,1_5,1_6" "oneb") + +;; All possible combinations of bundles/syllables +(define_cpu_unit + "1b_0m.ii, 1b_0m.mi, 1b_0m.fi, 1b_0m.mf, 1b_0b.bb, 1b_0m.bb,\ + 1b_0m.ib, 1b_0m.mb, 1b_0m.fb, 1b_0m.lx" "oneb") +(define_cpu_unit + "1b_0mi.i, 1b_0mm.i, 1b_0mf.i, 1b_0mm.f, 1b_0bb.b, 1b_0mb.b,\ + 1b_0mi.b, 1b_0mm.b, 1b_0mf.b" "oneb") +(define_query_cpu_unit + "1b_0mii., 1b_0mmi., 1b_0mfi., 1b_0mmf., 1b_0bbb., 1b_0mbb.,\ + 1b_0mib., 1b_0mmb., 1b_0mfb., 1b_0mlx." "oneb") + +(define_cpu_unit "1b_1m.ii, 1b_1m.mi, 1b_1m.fi, 1b_1b.bb, 1b_1m.bb,\ + 1b_1m.ib, 1b_1m.mb, 1b_1m.fb, 1b_1m.lx" "oneb") +(define_cpu_unit "1b_1mi.i, 1b_1mm.i, 1b_1mf.i, 1b_1bb.b, 1b_1mb.b,\ + 1b_1mi.b, 1b_1mm.b, 1b_1mf.b" "oneb") +(define_query_cpu_unit "1b_1mii., 1b_1mmi., 1b_1mfi., 1b_1bbb., 1b_1mbb.,\ + 1b_1mib., 1b_1mmb., 1b_1mfb., 1b_1mlx." "oneb") + +;; Slot 1 +(exclusion_set "1b_0m.ii" + "1b_0m.mi, 1b_0m.fi, 1b_0m.mf, 1b_0b.bb, 1b_0m.bb,\ + 1b_0m.ib, 1b_0m.mb, 1b_0m.fb, 1b_0m.lx") +(exclusion_set "1b_0m.mi" + "1b_0m.fi, 1b_0m.mf, 1b_0b.bb, 1b_0m.bb, 1b_0m.ib,\ + 1b_0m.mb, 1b_0m.fb, 1b_0m.lx") +(exclusion_set "1b_0m.fi" + "1b_0m.mf, 1b_0b.bb, 1b_0m.bb, 1b_0m.ib, 1b_0m.mb, 1b_0m.fb, 1b_0m.lx") +(exclusion_set "1b_0m.mf" + "1b_0b.bb, 1b_0m.bb, 1b_0m.ib, 1b_0m.mb, 1b_0m.fb, 1b_0m.lx") +(exclusion_set "1b_0b.bb" "1b_0m.bb, 1b_0m.ib, 1b_0m.mb, 1b_0m.fb, 1b_0m.lx") +(exclusion_set "1b_0m.bb" "1b_0m.ib, 1b_0m.mb, 1b_0m.fb, 1b_0m.lx") +(exclusion_set "1b_0m.ib" "1b_0m.mb, 1b_0m.fb, 1b_0m.lx") +(exclusion_set "1b_0m.mb" "1b_0m.fb, 1b_0m.lx") +(exclusion_set "1b_0m.fb" "1b_0m.lx") + +;; Slot 2 +(exclusion_set "1b_0mi.i" + "1b_0mm.i, 1b_0mf.i, 1b_0mm.f, 1b_0bb.b, 1b_0mb.b,\ + 1b_0mi.b, 1b_0mm.b, 1b_0mf.b, 1b_0mlx.") +(exclusion_set "1b_0mm.i" + "1b_0mf.i, 1b_0mm.f, 1b_0bb.b, 1b_0mb.b,\ + 1b_0mi.b, 1b_0mm.b, 1b_0mf.b, 1b_0mlx.") +(exclusion_set "1b_0mf.i" + "1b_0mm.f, 1b_0bb.b, 1b_0mb.b, 1b_0mi.b, 1b_0mm.b, 1b_0mf.b, 1b_0mlx.") +(exclusion_set "1b_0mm.f" + "1b_0bb.b, 1b_0mb.b, 1b_0mi.b, 1b_0mm.b, 1b_0mf.b, 1b_0mlx.") +(exclusion_set "1b_0bb.b" "1b_0mb.b, 1b_0mi.b, 1b_0mm.b, 1b_0mf.b, 1b_0mlx.") +(exclusion_set "1b_0mb.b" "1b_0mi.b, 1b_0mm.b, 1b_0mf.b, 1b_0mlx.") +(exclusion_set "1b_0mi.b" "1b_0mm.b, 1b_0mf.b, 1b_0mlx.") +(exclusion_set "1b_0mm.b" "1b_0mf.b, 1b_0mlx.") +(exclusion_set "1b_0mf.b" "1b_0mlx.") + +;; Slot 3 +(exclusion_set "1b_0mii." + "1b_0mmi., 1b_0mfi., 1b_0mmf., 1b_0bbb., 1b_0mbb.,\ + 1b_0mib., 1b_0mmb., 1b_0mfb., 1b_0mlx.") +(exclusion_set "1b_0mmi." + "1b_0mfi., 1b_0mmf., 1b_0bbb., 1b_0mbb.,\ + 1b_0mib., 1b_0mmb., 1b_0mfb., 1b_0mlx.") +(exclusion_set "1b_0mfi." + "1b_0mmf., 1b_0bbb., 1b_0mbb., 1b_0mib., 1b_0mmb., 1b_0mfb., 1b_0mlx.") +(exclusion_set "1b_0mmf." + "1b_0bbb., 1b_0mbb., 1b_0mib., 1b_0mmb., 1b_0mfb., 1b_0mlx.") +(exclusion_set "1b_0bbb." "1b_0mbb., 1b_0mib., 1b_0mmb., 1b_0mfb., 1b_0mlx.") +(exclusion_set "1b_0mbb." "1b_0mib., 1b_0mmb., 1b_0mfb., 1b_0mlx.") +(exclusion_set "1b_0mib." "1b_0mmb., 1b_0mfb., 1b_0mlx.") +(exclusion_set "1b_0mmb." "1b_0mfb., 1b_0mlx.") +(exclusion_set "1b_0mfb." "1b_0mlx.") + +;; Slot 4 +(exclusion_set "1b_1m.ii" + "1b_1m.mi, 1b_1m.fi, 1b_1b.bb, 1b_1m.bb,\ + 1b_1m.ib, 1b_1m.mb, 1b_1m.fb, 1b_1m.lx") +(exclusion_set "1b_1m.mi" + "1b_1m.fi, 1b_1b.bb, 1b_1m.bb, 1b_1m.ib, 1b_1m.mb, 1b_1m.fb, 1b_1m.lx") +(exclusion_set "1b_1m.fi" + "1b_1b.bb, 1b_1m.bb, 1b_1m.ib, 1b_1m.mb, 1b_1m.fb, 1b_1m.lx") +(exclusion_set "1b_1b.bb" "1b_1m.bb, 1b_1m.ib, 1b_1m.mb, 1b_1m.fb, 1b_1m.lx") +(exclusion_set "1b_1m.bb" "1b_1m.ib, 1b_1m.mb, 1b_1m.fb, 1b_1m.lx") +(exclusion_set "1b_1m.ib" "1b_1m.mb, 1b_1m.fb, 1b_1m.lx") +(exclusion_set "1b_1m.mb" "1b_1m.fb, 1b_1m.lx") +(exclusion_set "1b_1m.fb" "1b_1m.lx") + +;; Slot 5 +(exclusion_set "1b_1mi.i" + "1b_1mm.i, 1b_1mf.i, 1b_1bb.b, 1b_1mb.b,\ + 1b_1mi.b, 1b_1mm.b, 1b_1mf.b, 1b_1mlx.") +(exclusion_set "1b_1mm.i" + "1b_1mf.i, 1b_1bb.b, 1b_1mb.b, 1b_1mi.b, 1b_1mm.b, 1b_1mf.b, 1b_1mlx.") +(exclusion_set "1b_1mf.i" + "1b_1bb.b, 1b_1mb.b, 1b_1mi.b, 1b_1mm.b, 1b_1mf.b, 1b_1mlx.") +(exclusion_set "1b_1bb.b" "1b_1mb.b, 1b_1mi.b, 1b_1mm.b, 1b_1mf.b, 1b_1mlx.") +(exclusion_set "1b_1mb.b" "1b_1mi.b, 1b_1mm.b, 1b_1mf.b, 1b_1mlx.") +(exclusion_set "1b_1mi.b" "1b_1mm.b, 1b_1mf.b, 1b_1mlx.") +(exclusion_set "1b_1mm.b" "1b_1mf.b, 1b_1mlx.") +(exclusion_set "1b_1mf.b" "1b_1mlx.") + +;; Slot 6 +(exclusion_set "1b_1mii." + "1b_1mmi., 1b_1mfi., 1b_1bbb., 1b_1mbb.,\ + 1b_1mib., 1b_1mmb., 1b_1mfb., 1b_1mlx.") +(exclusion_set "1b_1mmi." + "1b_1mfi., 1b_1bbb., 1b_1mbb., 1b_1mib., 1b_1mmb., 1b_1mfb., 1b_1mlx.") +(exclusion_set "1b_1mfi." + "1b_1bbb., 1b_1mbb., 1b_1mib., 1b_1mmb., 1b_1mfb., 1b_1mlx.") +(exclusion_set "1b_1bbb." "1b_1mbb., 1b_1mib., 1b_1mmb., 1b_1mfb., 1b_1mlx.") +(exclusion_set "1b_1mbb." "1b_1mib., 1b_1mmb., 1b_1mfb., 1b_1mlx.") +(exclusion_set "1b_1mib." "1b_1mmb., 1b_1mfb., 1b_1mlx.") +(exclusion_set "1b_1mmb." "1b_1mfb., 1b_1mlx.") +(exclusion_set "1b_1mfb." "1b_1mlx.") + +(final_presence_set "1b_0mi.i" "1b_0m.ii") +(final_presence_set "1b_0mii." "1b_0mi.i") +(final_presence_set "1b_1mi.i" "1b_1m.ii") +(final_presence_set "1b_1mii." "1b_1mi.i") + +(final_presence_set "1b_0mm.i" "1b_0m.mi") +(final_presence_set "1b_0mmi." "1b_0mm.i") +(final_presence_set "1b_1mm.i" "1b_1m.mi") +(final_presence_set "1b_1mmi." "1b_1mm.i") + +(final_presence_set "1b_0mf.i" "1b_0m.fi") +(final_presence_set "1b_0mfi." "1b_0mf.i") +(final_presence_set "1b_1mf.i" "1b_1m.fi") +(final_presence_set "1b_1mfi." "1b_1mf.i") + +(final_presence_set "1b_0mm.f" "1b_0m.mf") +(final_presence_set "1b_0mmf." "1b_0mm.f") + +(final_presence_set "1b_0bb.b" "1b_0b.bb") +(final_presence_set "1b_0bbb." "1b_0bb.b") +(final_presence_set "1b_1bb.b" "1b_1b.bb") +(final_presence_set "1b_1bbb." "1b_1bb.b") + +(final_presence_set "1b_0mb.b" "1b_0m.bb") +(final_presence_set "1b_0mbb." "1b_0mb.b") +(final_presence_set "1b_1mb.b" "1b_1m.bb") +(final_presence_set "1b_1mbb." "1b_1mb.b") + +(final_presence_set "1b_0mi.b" "1b_0m.ib") +(final_presence_set "1b_0mib." "1b_0mi.b") +(final_presence_set "1b_1mi.b" "1b_1m.ib") +(final_presence_set "1b_1mib." "1b_1mi.b") + +(final_presence_set "1b_0mm.b" "1b_0m.mb") +(final_presence_set "1b_0mmb." "1b_0mm.b") +(final_presence_set "1b_1mm.b" "1b_1m.mb") +(final_presence_set "1b_1mmb." "1b_1mm.b") + +(final_presence_set "1b_0mf.b" "1b_0m.fb") +(final_presence_set "1b_0mfb." "1b_0mf.b") +(final_presence_set "1b_1mf.b" "1b_1m.fb") +(final_presence_set "1b_1mfb." "1b_1mf.b") + +(final_presence_set "1b_0mlx." "1b_0m.lx") +(final_presence_set "1b_1mlx." "1b_1m.lx") + +(final_presence_set + "1b_1m.ii,1b_1m.mi,1b_1m.fi,1b_1b.bb,1b_1m.bb,\ + 1b_1m.ib,1b_1m.mb,1b_1m.fb,1b_1m.lx" + "1b_0mii.,1b_0mmi.,1b_0mfi.,1b_0mmf.,1b_0bbb.,1b_0mbb.,\ + 1b_0mib.,1b_0mmb.,1b_0mfb.,1b_0mlx.") + +;; Microarchitecture units: +(define_cpu_unit + "1b_um0, 1b_um1, 1b_ui0, 1b_ui1, 1b_uf0, 1b_uf1, 1b_ub0, 1b_ub1, 1b_ub2,\ + 1b_unb0, 1b_unb1, 1b_unb2" "oneb") + +(exclusion_set "1b_ub0" "1b_unb0") +(exclusion_set "1b_ub1" "1b_unb1") +(exclusion_set "1b_ub2" "1b_unb2") + +;; The following rules are used to decrease number of alternatives. +;; They are consequences of Itanium microarchitecture. They also +;; describe the following rules mentioned in Itanium +;; microarchitecture: rules mentioned in Itanium microarchitecture: +;; o "MMF: Always splits issue before the first M and after F regardless +;; of surrounding bundles and stops". +;; o "BBB/MBB: Always splits issue after either of these bundles". +;; o "MIB BBB: Split issue after the first bundle in this pair". + +(exclusion_set "1b_0m.mf,1b_0mm.f,1b_0mmf." + "1b_1m.ii,1b_1m.mi,1b_1m.fi,1b_1b.bb,1b_1m.bb,\ + 1b_1m.ib,1b_1m.mb,1b_1m.fb,1b_1m.lx") +(exclusion_set "1b_0b.bb,1b_0bb.b,1b_0bbb.,1b_0m.bb,1b_0mb.b,1b_0mbb." + "1b_1m.ii,1b_1m.mi,1b_1m.fi,1b_1b.bb,1b_1m.bb,\ + 1b_1m.ib,1b_1m.mb,1b_1m.fb,1b_1m.lx") +(exclusion_set "1b_0m.ib,1b_0mi.b,1b_0mib." "1b_1b.bb") + +;; For exceptions of M, I, B, F insns: +(define_cpu_unit "1b_not_um1, 1b_not_ui1, 1b_not_uf1" "oneb") + +(final_absence_set "1b_not_um1" "1b_um1") +(final_absence_set "1b_not_ui1" "1b_ui1") +(final_absence_set "1b_not_uf1" "1b_uf1") + +;;; "MIB/MFB/MMB: Splits issue after any of these bundles unless the +;;; B-slot contains a nop.b or a brp instruction". +;;; "The B in an MIB/MFB/MMB bundle disperses to B0 if it is a brp or +;;; nop.b, otherwise it disperses to B2". +(final_absence_set + "1b_1m.ii, 1b_1m.mi, 1b_1m.fi, 1b_1b.bb, 1b_1m.bb,\ + 1b_1m.ib, 1b_1m.mb, 1b_1m.fb, 1b_1m.lx" + "1b_0mib. 1b_ub2, 1b_0mfb. 1b_ub2, 1b_0mmb. 1b_ub2") + +;; This is necessary to start new processor cycle when we meet stop bit. +(define_cpu_unit "1b_stop" "oneb") +(final_absence_set + "1b_0m.ii,1b_0mi.i,1b_0mii.,1b_0m.mi,1b_0mm.i,1b_0mmi.,\ + 1b_0m.fi,1b_0mf.i,1b_0mfi.,\ + 1b_0m.mf,1b_0mm.f,1b_0mmf.,1b_0b.bb,1b_0bb.b,1b_0bbb.,\ + 1b_0m.bb,1b_0mb.b,1b_0mbb.,\ + 1b_0m.ib,1b_0mi.b,1b_0mib.,1b_0m.mb,1b_0mm.b,1b_0mmb.,\ + 1b_0m.fb,1b_0mf.b,1b_0mfb.,1b_0m.lx,1b_0mlx., \ + 1b_1m.ii,1b_1mi.i,1b_1mii.,1b_1m.mi,1b_1mm.i,1b_1mmi.,\ + 1b_1m.fi,1b_1mf.i,1b_1mfi.,\ + 1b_1b.bb,1b_1bb.b,1b_1bbb.,1b_1m.bb,1b_1mb.b,1b_1mbb.,\ + 1b_1m.ib,1b_1mi.b,1b_1mib.,\ + 1b_1m.mb,1b_1mm.b,1b_1mmb.,1b_1m.fb,1b_1mf.b,1b_1mfb.,1b_1m.lx,1b_1mlx." + "1b_stop") + +;; M and I instruction is dispersed to the lowest numbered M or I unit +;; not already in use. An I slot in the 3rd position of 2nd bundle is +;; always dispersed to I1 +(final_presence_set "1b_um1" "1b_um0") +(final_presence_set "1b_ui1" "1b_ui0, 1b_1mii., 1b_1mmi., 1b_1mfi.") + +;; Insns + +;; M and I instruction is dispersed to the lowest numbered M or I unit +;; not already in use. An I slot in the 3rd position of 2nd bundle is +;; always dispersed to I1 +(define_reservation "1b_M" + "1b_0m.ii+1_1+1b_um0|1b_0m.mi+1_1+1b_um0|1b_0mm.i+1_2+(1b_um0|1b_um1)\ + |1b_0m.fi+1_1+1b_um0|1b_0m.mf+1_1+1b_um0|1b_0mm.f+1_2+1b_um1\ + |1b_0m.bb+1_1+1b_um0|1b_0m.ib+1_1+1b_um0|1b_0m.mb+1_1+1b_um0\ + |1b_0mm.b+1_2+1b_um1|1b_0m.fb+1_1+1b_um0|1b_0m.lx+1_1+1b_um0\ + |1b_1mm.i+1_5+1b_um1|1b_1mm.b+1_5+1b_um1\ + |(1b_1m.ii+1_4|1b_1m.mi+1_4|1b_1m.fi+1_4|1b_1m.bb+1_4|1b_1m.ib+1_4\ + |1b_1m.mb+1_4|1b_1m.fb+1_4|1b_1m.lx+1_4)\ + +(1b_um0|1b_um1)") + +;; Exceptions for dispersal rules. +;; "An I slot in the 3rd position of 2nd bundle is always dispersed to I1". +(define_reservation "1b_I" + "1b_0mi.i+1_2+1b_ui0|1b_0mii.+1_3+(1b_ui0|1b_ui1)|1b_0mmi.+1_3+1b_ui0\ + |1b_0mfi.+1_3+1b_ui0|1b_0mi.b+1_2+1b_ui0\ + |(1b_1mi.i+1_5|1b_1mi.b+1_5)+(1b_ui0|1b_ui1)\ + |1b_1mii.+1_6+1b_ui1|1b_1mmi.+1_6+1b_ui1|1b_1mfi.+1_6+1b_ui1") + +;; "An F slot in the 1st bundle disperses to F0". +;; "An F slot in the 2st bundle disperses to F1". +(define_reservation "1b_F" + "1b_0mf.i+1_2+1b_uf0|1b_0mmf.+1_3+1b_uf0|1b_0mf.b+1_2+1b_uf0\ + |1b_1mf.i+1_5+1b_uf1|1b_1mf.b+1_5+1b_uf1") + +;;; "Each B slot in MBB or BBB bundle disperses to the corresponding B +;;; unit. That is, a B slot in 1st position is despersed to B0. In the +;;; 2nd position it is dispersed to B2". +(define_reservation "1b_NB" + "1b_0b.bb+1_1+1b_unb0|1b_0bb.b+1_2+1b_unb1|1b_0bbb.+1_3+1b_unb2\ + |1b_0mb.b+1_2+1b_unb1|1b_0mbb.+1_3+1b_unb2\ + |1b_0mib.+1_3+1b_unb0|1b_0mmb.+1_3+1b_unb0|1b_0mfb.+1_3+1b_unb0\ + |1b_1b.bb+1_4+1b_unb0|1b_1bb.b+1_5+1b_unb1\ + |1b_1bbb.+1_6+1b_unb2|1b_1mb.b+1_5+1b_unb1|1b_1mbb.+1_6+1b_unb2\ + |1b_1mib.+1_6+1b_unb0|1b_1mmb.+1_6+1b_unb0|1b_1mfb.+1_6+1b_unb0") + +(define_reservation "1b_B" + "1b_0b.bb+1_1+1b_ub0|1b_0bb.b+1_2+1b_ub1|1b_0bbb.+1_3+1b_ub2\ + |1b_0mb.b+1_2+1b_ub1|1b_0mbb.+1_3+1b_ub2|1b_0mib.+1_3+1b_ub2\ + |1b_0mfb.+1_3+1b_ub2|1b_1b.bb+1_4+1b_ub0|1b_1bb.b+1_5+1b_ub1\ + |1b_1bbb.+1_6+1b_ub2|1b_1mb.b+1_5+1b_ub1\ + |1b_1mib.+1_6+1b_ub2|1b_1mmb.+1_6+1b_ub2|1b_1mfb.+1_6+1b_ub2") + +(define_reservation "1b_L" "1b_0mlx.+1_3+1b_ui0+1b_uf0\ + |1b_1mlx.+1_6+(1b_ui0|1b_ui1)+1b_uf1") + +;; We assume that there is no insn issued on the same cycle as unknown insn. +(define_cpu_unit "1b_empty" "oneb") +(exclusion_set "1b_empty" + "1b_0m.ii,1b_0m.mi,1b_0m.fi,1b_0m.mf,1b_0b.bb,1b_0m.bb,\ + 1b_0m.ib,1b_0m.mb,1b_0m.fb,1b_0m.lx") + +(define_cpu_unit + "1b_0m_bs, 1b_0mi_bs, 1b_0mm_bs, 1b_0mf_bs, 1b_0b_bs, 1b_0bb_bs, 1b_0mb_bs" + "oneb") +(define_cpu_unit + "1b_1m_bs, 1b_1mi_bs, 1b_1mm_bs, 1b_1mf_bs, 1b_1b_bs, 1b_1bb_bs, 1b_1mb_bs" + "oneb") + +(define_cpu_unit "1b_m_cont, 1b_mi_cont, 1b_mm_cont, 1b_mf_cont, 1b_mb_cont,\ + 1b_b_cont, 1b_bb_cont" "oneb") + +;; For stop in the middle of the bundles. +(define_cpu_unit "1b_m_stop, 1b_m0_stop, 1b_m1_stop, 1b_0mmi_cont" "oneb") +(define_cpu_unit "1b_mi_stop, 1b_mi0_stop, 1b_mi1_stop, 1b_0mii_cont" "oneb") + +(final_presence_set "1b_0m_bs" + "1b_0m.ii, 1b_0m.mi, 1b_0m.mf, 1b_0m.fi, 1b_0m.bb,\ + 1b_0m.ib, 1b_0m.fb, 1b_0m.mb, 1b_0m.lx") +(final_presence_set "1b_1m_bs" + "1b_1m.ii, 1b_1m.mi, 1b_1m.fi, 1b_1m.bb, 1b_1m.ib, 1b_1m.fb, 1b_1m.mb,\ + 1b_1m.lx") +(final_presence_set "1b_0mi_bs" "1b_0mi.i, 1b_0mi.i") +(final_presence_set "1b_1mi_bs" "1b_1mi.i, 1b_1mi.i") +(final_presence_set "1b_0mm_bs" "1b_0mm.i, 1b_0mm.f, 1b_0mm.b") +(final_presence_set "1b_1mm_bs" "1b_1mm.i, 1b_1mm.b") +(final_presence_set "1b_0mf_bs" "1b_0mf.i, 1b_0mf.b") +(final_presence_set "1b_1mf_bs" "1b_1mf.i, 1b_1mf.b") +(final_presence_set "1b_0b_bs" "1b_0b.bb") +(final_presence_set "1b_1b_bs" "1b_1b.bb") +(final_presence_set "1b_0bb_bs" "1b_0bb.b") +(final_presence_set "1b_1bb_bs" "1b_1bb.b") +(final_presence_set "1b_0mb_bs" "1b_0mb.b") +(final_presence_set "1b_1mb_bs" "1b_1mb.b") + +(exclusion_set "1b_0m_bs" + "1b_0mi.i, 1b_0mm.i, 1b_0mm.f, 1b_0mf.i, 1b_0mb.b,\ + 1b_0mi.b, 1b_0mf.b, 1b_0mm.b, 1b_0mlx., 1b_m0_stop") +(exclusion_set "1b_1m_bs" + "1b_1mi.i, 1b_1mm.i, 1b_1mf.i, 1b_1mb.b, 1b_1mi.b, 1b_1mf.b, 1b_1mm.b,\ + 1b_1mlx., 1b_m1_stop") +(exclusion_set "1b_0mi_bs" "1b_0mii., 1b_0mib., 1b_mi0_stop") +(exclusion_set "1b_1mi_bs" "1b_1mii., 1b_1mib., 1b_mi1_stop") +(exclusion_set "1b_0mm_bs" "1b_0mmi., 1b_0mmf., 1b_0mmb.") +(exclusion_set "1b_1mm_bs" "1b_1mmi., 1b_1mmb.") +(exclusion_set "1b_0mf_bs" "1b_0mfi., 1b_0mfb.") +(exclusion_set "1b_1mf_bs" "1b_1mfi., 1b_1mfb.") +(exclusion_set "1b_0b_bs" "1b_0bb.b") +(exclusion_set "1b_1b_bs" "1b_1bb.b") +(exclusion_set "1b_0bb_bs" "1b_0bbb.") +(exclusion_set "1b_1bb_bs" "1b_1bbb.") +(exclusion_set "1b_0mb_bs" "1b_0mbb.") +(exclusion_set "1b_1mb_bs" "1b_1mbb.") + +(exclusion_set + "1b_0m_bs, 1b_0mi_bs, 1b_0mm_bs, 1b_0mf_bs, 1b_0b_bs, 1b_0bb_bs, 1b_0mb_bs, + 1b_1m_bs, 1b_1mi_bs, 1b_1mm_bs, 1b_1mf_bs, 1b_1b_bs, 1b_1bb_bs, 1b_1mb_bs" + "1b_stop") + +(final_presence_set + "1b_0mi.i, 1b_0mm.i, 1b_0mf.i, 1b_0mm.f, 1b_0mb.b,\ + 1b_0mi.b, 1b_0mm.b, 1b_0mf.b, 1b_0mlx." + "1b_m_cont") +(final_presence_set "1b_0mii., 1b_0mib." "1b_mi_cont") +(final_presence_set "1b_0mmi., 1b_0mmf., 1b_0mmb." "1b_mm_cont") +(final_presence_set "1b_0mfi., 1b_0mfb." "1b_mf_cont") +(final_presence_set "1b_0bb.b" "1b_b_cont") +(final_presence_set "1b_0bbb." "1b_bb_cont") +(final_presence_set "1b_0mbb." "1b_mb_cont") + +(exclusion_set + "1b_0m.ii, 1b_0m.mi, 1b_0m.fi, 1b_0m.mf, 1b_0b.bb, 1b_0m.bb,\ + 1b_0m.ib, 1b_0m.mb, 1b_0m.fb, 1b_0m.lx" + "1b_m_cont, 1b_mi_cont, 1b_mm_cont, 1b_mf_cont,\ + 1b_mb_cont, 1b_b_cont, 1b_bb_cont") + +(exclusion_set "1b_empty" + "1b_m_cont,1b_mi_cont,1b_mm_cont,1b_mf_cont,\ + 1b_mb_cont,1b_b_cont,1b_bb_cont") + +;; For m;mi bundle +(final_presence_set "1b_m0_stop" "1b_0m.mi") +(final_presence_set "1b_0mm.i" "1b_0mmi_cont") +(exclusion_set "1b_0mmi_cont" + "1b_0m.ii, 1b_0m.mi, 1b_0m.fi, 1b_0m.mf, 1b_0b.bb, 1b_0m.bb,\ + 1b_0m.ib, 1b_0m.mb, 1b_0m.fb, 1b_0m.lx") +(exclusion_set "1b_m0_stop" "1b_0mm.i") +(final_presence_set "1b_m1_stop" "1b_1m.mi") +(exclusion_set "1b_m1_stop" "1b_1mm.i") +(final_presence_set "1b_m_stop" "1b_m0_stop, 1b_m1_stop") + +;; For mi;i bundle +(final_presence_set "1b_mi0_stop" "1b_0mi.i") +(final_presence_set "1b_0mii." "1b_0mii_cont") +(exclusion_set "1b_0mii_cont" + "1b_0m.ii, 1b_0m.mi, 1b_0m.fi, 1b_0m.mf, 1b_0b.bb, 1b_0m.bb,\ + 1b_0m.ib, 1b_0m.mb, 1b_0m.fb, 1b_0m.lx") +(exclusion_set "1b_mi0_stop" "1b_0mii.") +(final_presence_set "1b_mi1_stop" "1b_1mi.i") +(exclusion_set "1b_mi1_stop" "1b_1mii.") +(final_presence_set "1b_mi_stop" "1b_mi0_stop, 1b_mi1_stop") + +(final_absence_set + "1b_0m.ii,1b_0mi.i,1b_0mii.,1b_0m.mi,1b_0mm.i,1b_0mmi.,\ + 1b_0m.fi,1b_0mf.i,1b_0mfi.,1b_0m.mf,1b_0mm.f,1b_0mmf.,\ + 1b_0b.bb,1b_0bb.b,1b_0bbb.,1b_0m.bb,1b_0mb.b,1b_0mbb.,\ + 1b_0m.ib,1b_0mi.b,1b_0mib.,1b_0m.mb,1b_0mm.b,1b_0mmb.,\ + 1b_0m.fb,1b_0mf.b,1b_0mfb.,1b_0m.lx,1b_0mlx., \ + 1b_1m.ii,1b_1mi.i,1b_1mii.,1b_1m.mi,1b_1mm.i,1b_1mmi.,\ + 1b_1m.fi,1b_1mf.i,1b_1mfi.,\ + 1b_1b.bb,1b_1bb.b,1b_1bbb.,1b_1m.bb,1b_1mb.b,1b_1mbb.,\ + 1b_1m.ib,1b_1mi.b,1b_1mib.,1b_1m.mb,1b_1mm.b,1b_1mmb.,\ + 1b_1m.fb,1b_1mf.b,1b_1mfb.,1b_1m.lx,1b_1mlx." + "1b_m0_stop,1b_m1_stop,1b_mi0_stop,1b_mi1_stop") + +(define_reservation "1b_A" "1b_M|1b_I") + +(define_insn_reservation "1b_stop_bit" 0 + (and (and (eq_attr "cpu" "itanium") + (eq_attr "itanium_class" "stop_bit")) + (ne (symbol_ref "bundling_p") (const_int 0))) + "1b_stop|1b_m0_stop|1b_m1_stop|1b_mi0_stop|1b_mi1_stop") +(define_insn_reservation "1b_br" 0 + (and (and (eq_attr "cpu" "itanium") + (eq_attr "itanium_class" "br")) + (ne (symbol_ref "bundling_p") (const_int 0))) "1b_B") +(define_insn_reservation "1b_scall" 0 + (and (and (eq_attr "cpu" "itanium") + (eq_attr "itanium_class" "scall")) + (ne (symbol_ref "bundling_p") (const_int 0))) "1b_B") +(define_insn_reservation "1b_fcmp" 2 + (and (and (eq_attr "cpu" "itanium") + (eq_attr "itanium_class" "fcmp")) + (ne (symbol_ref "bundling_p") (const_int 0))) + "1b_F+1b_not_uf1") +(define_insn_reservation "1b_fcvtfx" 7 + (and (and (eq_attr "cpu" "itanium") + (eq_attr "itanium_class" "fcvtfx")) + (ne (symbol_ref "bundling_p") (const_int 0))) "1b_F") +(define_insn_reservation "1b_fld" 9 + (and (and (eq_attr "cpu" "itanium") + (eq_attr "itanium_class" "fld")) + (ne (symbol_ref "bundling_p") (const_int 0))) "1b_M") +(define_insn_reservation "1b_fmac" 5 + (and (and (eq_attr "cpu" "itanium") + (eq_attr "itanium_class" "fmac")) + (ne (symbol_ref "bundling_p") (const_int 0))) "1b_F") +(define_insn_reservation "1b_fmisc" 5 + (and (and (eq_attr "cpu" "itanium") + (eq_attr "itanium_class" "fmisc")) + (ne (symbol_ref "bundling_p") (const_int 0))) + "1b_F+1b_not_uf1") +(define_insn_reservation "1b_frar_i" 13 + (and (and (eq_attr "cpu" "itanium") + (eq_attr "itanium_class" "frar_i")) + (ne (symbol_ref "bundling_p") (const_int 0))) + "1b_I+1b_not_ui1") +(define_insn_reservation "1b_frar_m" 6 + (and (and (eq_attr "cpu" "itanium") + (eq_attr "itanium_class" "frar_m")) + (ne (symbol_ref "bundling_p") (const_int 0))) + "1b_M+1b_not_um1") +(define_insn_reservation "1b_frbr" 2 + (and (and (eq_attr "cpu" "itanium") + (eq_attr "itanium_class" "frbr")) + (ne (symbol_ref "bundling_p") (const_int 0))) + "1b_I+1b_not_ui1") +(define_insn_reservation "1b_frfr" 2 + (and (and (eq_attr "cpu" "itanium") + (eq_attr "itanium_class" "frfr")) + (ne (symbol_ref "bundling_p") (const_int 0))) + "1b_M+1b_not_um1") +(define_insn_reservation "1b_frpr" 2 + (and (and (eq_attr "cpu" "itanium") + (eq_attr "itanium_class" "frpr")) + (ne (symbol_ref "bundling_p") (const_int 0))) + "1b_I+1b_not_ui1") +(define_insn_reservation "1b_ialu" 1 + (and (and (eq_attr "cpu" "itanium") + (eq_attr "itanium_class" "ialu")) + (ne (symbol_ref + "bundling_p && !ia64_produce_address_p (insn)") + (const_int 0))) + "1b_A") +(define_insn_reservation "1b_ialu_addr" 1 + (and (and (eq_attr "cpu" "itanium") + (eq_attr "itanium_class" "ialu")) + (eq (symbol_ref + "bundling_p && ia64_produce_address_p (insn)") + (const_int 1))) + "1b_M") +(define_insn_reservation "1b_icmp" 1 + (and (and (eq_attr "cpu" "itanium") + (eq_attr "itanium_class" "icmp")) + (ne (symbol_ref "bundling_p") (const_int 0))) "1b_A") +(define_insn_reservation "1b_ilog" 1 + (and (and (eq_attr "cpu" "itanium") + (eq_attr "itanium_class" "ilog")) + (ne (symbol_ref "bundling_p") (const_int 0))) "1b_A") +(define_insn_reservation "1b_ishf" 1 + (and (and (eq_attr "cpu" "itanium") + (eq_attr "itanium_class" "ishf")) + (ne (symbol_ref "bundling_p") (const_int 0))) + "1b_I+1b_not_ui1") +(define_insn_reservation "1b_ld" 2 + (and (and (eq_attr "cpu" "itanium") + (eq_attr "itanium_class" "ld")) + (ne (symbol_ref "bundling_p") (const_int 0))) "1b_M") +(define_insn_reservation "1b_long_i" 1 + (and (and (eq_attr "cpu" "itanium") + (eq_attr "itanium_class" "long_i")) + (ne (symbol_ref "bundling_p") (const_int 0))) "1b_L") +(define_insn_reservation "1b_mmmul" 2 + (and (and (eq_attr "cpu" "itanium") + (eq_attr "itanium_class" "mmmul")) + (ne (symbol_ref "bundling_p") (const_int 0))) + "1b_I+1b_not_ui1") +(define_insn_reservation "1b_mmshf" 2 + (and (and (eq_attr "cpu" "itanium") + (eq_attr "itanium_class" "mmshf")) + (ne (symbol_ref "bundling_p") (const_int 0))) "1b_I") +(define_insn_reservation "1b_mmshfi" 2 + (and (and (eq_attr "cpu" "itanium") + (eq_attr "itanium_class" "mmshfi")) + (ne (symbol_ref "bundling_p") (const_int 0))) "1b_I") +(define_insn_reservation "1b_rse_m" 0 + (and (and (eq_attr "cpu" "itanium") + (eq_attr "itanium_class" "rse_m")) + (ne (symbol_ref "bundling_p") (const_int 0))) + "(1b_0m.ii|1b_0m.mi|1b_0m.fi|1b_0m.mf|1b_0b.bb|1b_0m.bb\ + |1b_0m.ib|1b_0m.mb|1b_0m.fb|1b_0m.lx)+1_1+1b_um0") +(define_insn_reservation "1b_sem" 0 + (and (and (eq_attr "cpu" "itanium") + (eq_attr "itanium_class" "sem")) + (ne (symbol_ref "bundling_p") (const_int 0))) + "1b_M+1b_not_um1") +(define_insn_reservation "1b_stf" 1 + (and (and (eq_attr "cpu" "itanium") + (eq_attr "itanium_class" "stf")) + (ne (symbol_ref "bundling_p") (const_int 0))) "1b_M") +(define_insn_reservation "1b_st" 1 + (and (and (eq_attr "cpu" "itanium") + (eq_attr "itanium_class" "st")) + (ne (symbol_ref "bundling_p") (const_int 0))) "1b_M") +(define_insn_reservation "1b_syst_m0" 0 + (and (and (eq_attr "cpu" "itanium") + (eq_attr "itanium_class" "syst_m0")) + (ne (symbol_ref "bundling_p") (const_int 0))) + "1b_M+1b_not_um1") +(define_insn_reservation "1b_syst_m" 0 + (and (and (eq_attr "cpu" "itanium") + (eq_attr "itanium_class" "syst_m")) + (ne (symbol_ref "bundling_p") (const_int 0))) "1b_M") +(define_insn_reservation "1b_tbit" 1 + (and (and (eq_attr "cpu" "itanium") + (eq_attr "itanium_class" "tbit")) + (ne (symbol_ref "bundling_p") (const_int 0))) + "1b_I+1b_not_ui1") +(define_insn_reservation "1b_toar_i" 0 + (and (and (eq_attr "cpu" "itanium") + (eq_attr "itanium_class" "toar_i")) + (ne (symbol_ref "bundling_p") (const_int 0))) + "1b_I+1b_not_ui1") +(define_insn_reservation "1b_toar_m" 5 + (and (and (eq_attr "cpu" "itanium") + (eq_attr "itanium_class" "toar_m")) + (ne (symbol_ref "bundling_p") (const_int 0))) + "1b_M+1b_not_um1") +(define_insn_reservation "1b_tobr" 1 + (and (and (eq_attr "cpu" "itanium") + (eq_attr "itanium_class" "tobr")) + (ne (symbol_ref "bundling_p") (const_int 0))) + "1b_I+1b_not_ui1") +(define_insn_reservation "1b_tofr" 9 + (and (and (eq_attr "cpu" "itanium") + (eq_attr "itanium_class" "tofr")) + (ne (symbol_ref "bundling_p") (const_int 0))) "1b_M") +(define_insn_reservation "1b_topr" 1 + (and (and (eq_attr "cpu" "itanium") + (eq_attr "itanium_class" "topr")) + (ne (symbol_ref "bundling_p") (const_int 0))) + "1b_I+1b_not_ui1") +(define_insn_reservation "1b_xmpy" 7 + (and (and (eq_attr "cpu" "itanium") + (eq_attr "itanium_class" "xmpy")) + (ne (symbol_ref "bundling_p") (const_int 0))) "1b_F") +(define_insn_reservation "1b_xtd" 1 + (and (and (eq_attr "cpu" "itanium") + (eq_attr "itanium_class" "xtd")) + (ne (symbol_ref "bundling_p") (const_int 0))) "1b_I") +(define_insn_reservation "1b_chk_s" 0 + (and (and (eq_attr "cpu" "itanium") + (eq_attr "itanium_class" "chk_s")) + (ne (symbol_ref "bundling_p") (const_int 0))) "1b_A") +(define_insn_reservation "1b_lfetch" 0 + (and (and (eq_attr "cpu" "itanium") + (eq_attr "itanium_class" "lfetch")) + (ne (symbol_ref "bundling_p") (const_int 0))) "1b_M") +(define_insn_reservation "1b_nop_m" 0 + (and (and (eq_attr "cpu" "itanium") + (eq_attr "itanium_class" "nop_m")) + (ne (symbol_ref "bundling_p") (const_int 0))) "1b_M") +(define_insn_reservation "1b_nop_b" 0 + (and (and (eq_attr "cpu" "itanium") + (eq_attr "itanium_class" "nop_b")) + (ne (symbol_ref "bundling_p") (const_int 0))) "1b_NB") +(define_insn_reservation "1b_nop_i" 0 + (and (and (eq_attr "cpu" "itanium") + (eq_attr "itanium_class" "nop_i")) + (ne (symbol_ref "bundling_p") (const_int 0))) "1b_I") +(define_insn_reservation "1b_nop_f" 0 + (and (and (eq_attr "cpu" "itanium") + (eq_attr "itanium_class" "nop_f")) + (ne (symbol_ref "bundling_p") (const_int 0))) "1b_F") +(define_insn_reservation "1b_nop_x" 0 + (and (and (eq_attr "cpu" "itanium") + (eq_attr "itanium_class" "nop_x")) + (ne (symbol_ref "bundling_p") (const_int 0))) "1b_L") +(define_insn_reservation "1b_unknown" 1 + (and (and (eq_attr "cpu" "itanium") + (eq_attr "itanium_class" "unknown")) + (ne (symbol_ref "bundling_p") (const_int 0))) + "1b_empty") +(define_insn_reservation "1b_nop" 1 + (and (and (eq_attr "cpu" "itanium") + (eq_attr "itanium_class" "nop")) + (ne (symbol_ref "bundling_p") (const_int 0))) + "1b_M|1b_NB|1b_I|1b_F") +(define_insn_reservation "1b_ignore" 0 + (and (and (eq_attr "cpu" "itanium") + (eq_attr "itanium_class" "ignore")) + (ne (symbol_ref "bundling_p") (const_int 0))) + "nothing") + +(define_insn_reservation "1b_pre_cycle" 0 + (and (and (eq_attr "cpu" "itanium") + (eq_attr "itanium_class" "pre_cycle")) + (ne (symbol_ref "bundling_p") (const_int 0))) + "(1b_0m_bs, 1b_m_cont) \ + | (1b_0mi_bs, 1b_mi_cont) \ + | (1b_0mm_bs, 1b_mm_cont) \ + | (1b_0mf_bs, 1b_mf_cont) \ + | (1b_0b_bs, 1b_b_cont) \ + | (1b_0bb_bs, 1b_bb_cont) \ + | (1b_0mb_bs, 1b_mb_cont) \ + | (1b_1m_bs, 1b_m_cont) \ + | (1b_1mi_bs, 1b_mi_cont) \ + | (1b_1mm_bs, 1b_mm_cont) \ + | (1b_1mf_bs, 1b_mf_cont) \ + | (1b_1b_bs, 1b_b_cont) \ + | (1b_1bb_bs, 1b_bb_cont) \ + | (1b_1mb_bs, 1b_mb_cont) \ + | (1b_m_stop, 1b_0mmi_cont) \ + | (1b_mi_stop, 1b_0mii_cont)") + diff --git a/gcc/config/ia64/itanium2.md b/gcc/config/ia64/itanium2.md new file mode 100644 index 00000000000..73b533ea70c --- /dev/null +++ b/gcc/config/ia64/itanium2.md @@ -0,0 +1,1762 @@ +;; Itanium2 DFA descriptions for insn scheduling and bundling. +;; Copyright (C) 2002 Free Software Foundation, Inc. +;; Contributed by Vladimir Makarov <vmakarov@redhat.com>. +;; +;; This file is part of GNU CC. +;; +;; GNU CC is free software; you can redistribute it and/or modify +;; it under the terms of the GNU General Public License as published by +;; the Free Software Foundation; either version 2, or (at your option) +;; any later version. +;; +;; GNU CC is distributed in the hope that it will be useful, +;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;; GNU General Public License for more details. +;; +;; You should have received a copy of the GNU General Public License +;; along with GNU CC; see the file COPYING. If not, write to +;; the Free Software Foundation, 59 Temple Place - Suite 330, +;; Boston, MA 02111-1307, USA. */ +;; + +/* This is description of pipeline hazards based on DFA. The + following constructions can be used for this: + + o define_cpu_unit string [string]) describes a cpu functional unit + (separated by comma). + + 1st operand: Names of cpu function units. + 2nd operand: Name of automaton (see comments for + DEFINE_AUTOMATON). + + All define_reservations and define_cpu_units should have unique + names which can not be "nothing". + + o (exclusion_set string string) means that each CPU function unit + in the first string can not be reserved simultaneously with each + unit whose name is in the second string and vise versa. CPU + units in the string are separated by commas. For example, it is + useful for description CPU with fully pipelined floating point + functional unit which can execute simultaneously only single + floating point insns or only double floating point insns. + + o (presence_set string string) means that each CPU function unit in + the first string can not be reserved unless at least one of + pattern of units whose names are in the second string is + reserved. This is an asymmetric relation. CPU units or unit + patterns in the strings are separated by commas. Pattern is one + unit name or unit names separated by white-spaces. + + For example, it is useful for description that slot1 is reserved + after slot0 reservation for a VLIW processor. We could describe + it by the following construction + + (presence_set "slot1" "slot0") + + Or slot1 is reserved only after slot0 and unit b0 reservation. + In this case we could write + + (presence_set "slot1" "slot0 b0") + + All CPU functional units in a set should belong to the same + automaton. + + o (final_presence_set string string) is analogous to + `presence_set'. The difference between them is when checking is + done. When an instruction is issued in given automaton state + reflecting all current and planned unit reservations, the + automaton state is changed. The first state is a source state, + the second one is a result state. Checking for `presence_set' is + done on the source state reservation, checking for + `final_presence_set' is done on the result reservation. This + construction is useful to describe a reservation which is + actually two subsequent reservations. For example, if we use + + (presence_set "slot1" "slot0") + + the following insn will be never issued (because slot1 requires + slot0 which is absent in the source state). + + (define_reservation "insn_and_nop" "slot0 + slot1") + + but it can be issued if we use analogous `final_presence_set'. + + o (absence_set string string) means that each CPU function unit in + the first string can be reserved only if each pattern of units + whose names are in the second string is not reserved. This is an + asymmetric relation (actually exclusion set is analogous to this + one but it is symmetric). CPU units or unit patterns in the + string are separated by commas. Pattern is one unit name or unit + names separated by white-spaces. + + For example, it is useful for description that slot0 can not be + reserved after slot1 or slot2 reservation for a VLIW processor. + We could describe it by the following construction + + (absence_set "slot2" "slot0, slot1") + + Or slot2 can not be reserved if slot0 and unit b0 are reserved or + slot1 and unit b1 are reserved . In this case we could write + + (absence_set "slot2" "slot0 b0, slot1 b1") + + All CPU functional units in a set should to belong the same + automaton. + + o (final_absence_set string string) is analogous to `absence_set' but + checking is done on the result (state) reservation. See comments + for final_presence_set. + + o (define_bypass number out_insn_names in_insn_names) names bypass with + given latency (the first number) from insns given by the first + string (see define_insn_reservation) into insns given by the + second string. Insn names in the strings are separated by + commas. + + o (define_automaton string) describes names of an automaton + generated and used for pipeline hazards recognition. The names + are separated by comma. Actually it is possibly to generate the + single automaton but unfortunately it can be very large. If we + use more one automata, the summary size of the automata usually + is less than the single one. The automaton name is used in + define_cpu_unit. All automata should have unique names. + + o (automata_option string) describes option for generation of + automata. Currently there are the following options: + + o "no-minimization" which makes no minimization of automata. + This is only worth to do when we are debugging the description + and need to look more accurately at reservations of states. + + o "ndfa" which makes automata with nondetermenistic reservation + by insns. + + o (define_reservation string string) names reservation (the first + string) of cpu functional units (the 2nd string). Sometimes unit + reservations for different insns contain common parts. In such + case, you describe common part and use one its name (the 1st + parameter) in regular expression in define_insn_reservation. All + define_reservations, define results and define_cpu_units should + have unique names which can not be "nothing". + + o (define_insn_reservation name default_latency condition regexpr) + describes reservation of cpu functional units (the 3nd operand) + for instruction which is selected by the condition (the 2nd + parameter). The first parameter is used for output of debugging + information. The reservations are described by a regular + expression according the following syntax: + + regexp = regexp "," oneof + | oneof + + oneof = oneof "|" allof + | allof + + allof = allof "+" repeat + | repeat + + repeat = element "*" number + | element + + element = cpu_function_name + | reservation_name + | result_name + | "nothing" + | "(" regexp ")" + + 1. "," is used for describing start of the next cycle in + reservation. + + 2. "|" is used for describing the reservation described by the + first regular expression *or* the reservation described by + the second regular expression *or* etc. + + 3. "+" is used for describing the reservation described by the + first regular expression *and* the reservation described by + the second regular expression *and* etc. + + 4. "*" is used for convinience and simply means sequence in + which the regular expression are repeated NUMBER times with + cycle advancing (see ","). + + 5. cpu function unit name which means reservation. + + 6. reservation name -- see define_reservation. + + 7. string "nothing" means no units reservation. + +*/ + +(define_automaton "two") + +;; All possible combinations of bundles/syllables +(define_cpu_unit "2_0m.ii, 2_0m.mi, 2_0m.fi, 2_0m.mf, 2_0b.bb, 2_0m.bb,\ + 2_0m.ib, 2_0m.mb, 2_0m.fb, 2_0m.lx" "two") +(define_cpu_unit "2_0mi.i, 2_0mm.i, 2_0mf.i, 2_0mm.f, 2_0bb.b, 2_0mb.b,\ + 2_0mi.b, 2_0mm.b, 2_0mf.b, 2_0mlx." "two") +(define_cpu_unit "2_0mii., 2_0mmi., 2_0mfi., 2_0mmf., 2_0bbb., 2_0mbb.,\ + 2_0mib., 2_0mmb., 2_0mfb." "two") + +(define_cpu_unit "2_1m.ii, 2_1m.mi, 2_1m.fi, 2_1m.mf, 2_1b.bb, 2_1m.bb,\ + 2_1m.ib, 2_1m.mb, 2_1m.fb, 2_1m.lx" "two") +(define_cpu_unit "2_1mi.i, 2_1mm.i, 2_1mf.i, 2_1mm.f, 2_1bb.b, 2_1mb.b,\ + 2_1mi.b, 2_1mm.b, 2_1mf.b, 2_1mlx." "two") +(define_cpu_unit "2_1mii., 2_1mmi., 2_1mfi., 2_1mmf., 2_1bbb., 2_1mbb.,\ + 2_1mib., 2_1mmb., 2_1mfb." "two") + +;; Slot 1 +(exclusion_set "2_0m.ii" "2_0m.mi, 2_0m.fi, 2_0m.mf, 2_0b.bb, 2_0m.bb,\ + 2_0m.ib, 2_0m.mb, 2_0m.fb, 2_0m.lx") +(exclusion_set "2_0m.mi" "2_0m.fi, 2_0m.mf, 2_0b.bb, 2_0m.bb, 2_0m.ib,\ + 2_0m.mb, 2_0m.fb, 2_0m.lx") +(exclusion_set "2_0m.fi" "2_0m.mf, 2_0b.bb, 2_0m.bb, 2_0m.ib, 2_0m.mb,\ + 2_0m.fb, 2_0m.lx") +(exclusion_set "2_0m.mf" "2_0b.bb, 2_0m.bb, 2_0m.ib, 2_0m.mb, 2_0m.fb,\ + 2_0m.lx") +(exclusion_set "2_0b.bb" "2_0m.bb, 2_0m.ib, 2_0m.mb, 2_0m.fb, 2_0m.lx") +(exclusion_set "2_0m.bb" "2_0m.ib, 2_0m.mb, 2_0m.fb, 2_0m.lx") +(exclusion_set "2_0m.ib" "2_0m.mb, 2_0m.fb, 2_0m.lx") +(exclusion_set "2_0m.mb" "2_0m.fb, 2_0m.lx") +(exclusion_set "2_0m.fb" "2_0m.lx") + +;; Slot 2 +(exclusion_set "2_0mi.i" "2_0mm.i, 2_0mf.i, 2_0mm.f, 2_0bb.b, 2_0mb.b,\ + 2_0mi.b, 2_0mm.b, 2_0mf.b, 2_0mlx.") +(exclusion_set "2_0mm.i" "2_0mf.i, 2_0mm.f, 2_0bb.b, 2_0mb.b,\ + 2_0mi.b, 2_0mm.b, 2_0mf.b, 2_0mlx.") +(exclusion_set "2_0mf.i" "2_0mm.f, 2_0bb.b, 2_0mb.b, 2_0mi.b, 2_0mm.b,\ + 2_0mf.b, 2_0mlx.") +(exclusion_set "2_0mm.f" "2_0bb.b, 2_0mb.b, 2_0mi.b, 2_0mm.b, 2_0mf.b,\ + 2_0mlx.") +(exclusion_set "2_0bb.b" "2_0mb.b, 2_0mi.b, 2_0mm.b, 2_0mf.b, 2_0mlx.") +(exclusion_set "2_0mb.b" "2_0mi.b, 2_0mm.b, 2_0mf.b, 2_0mlx.") +(exclusion_set "2_0mi.b" "2_0mm.b, 2_0mf.b, 2_0mlx.") +(exclusion_set "2_0mm.b" "2_0mf.b, 2_0mlx.") +(exclusion_set "2_0mf.b" "2_0mlx.") + +;; Slot 3 +(exclusion_set "2_0mii." "2_0mmi., 2_0mfi., 2_0mmf., 2_0bbb., 2_0mbb.,\ + 2_0mib., 2_0mmb., 2_0mfb., 2_0mlx.") +(exclusion_set "2_0mmi." "2_0mfi., 2_0mmf., 2_0bbb., 2_0mbb.,\ + 2_0mib., 2_0mmb., 2_0mfb., 2_0mlx.") +(exclusion_set "2_0mfi." "2_0mmf., 2_0bbb., 2_0mbb., 2_0mib., 2_0mmb.,\ + 2_0mfb., 2_0mlx.") +(exclusion_set "2_0mmf." "2_0bbb., 2_0mbb., 2_0mib., 2_0mmb., 2_0mfb.,\ + 2_0mlx.") +(exclusion_set "2_0bbb." "2_0mbb., 2_0mib., 2_0mmb., 2_0mfb., 2_0mlx.") +(exclusion_set "2_0mbb." "2_0mib., 2_0mmb., 2_0mfb., 2_0mlx.") +(exclusion_set "2_0mib." "2_0mmb., 2_0mfb., 2_0mlx.") +(exclusion_set "2_0mmb." "2_0mfb., 2_0mlx.") +(exclusion_set "2_0mfb." "2_0mlx.") + +;; Slot 4 +(exclusion_set "2_1m.ii" "2_1m.mi, 2_1m.fi, 2_1m.mf, 2_1b.bb, 2_1m.bb,\ + 2_1m.ib, 2_1m.mb, 2_1m.fb, 2_1m.lx") +(exclusion_set "2_1m.mi" "2_1m.fi, 2_1m.mf, 2_1b.bb, 2_1m.bb, 2_1m.ib,\ + 2_1m.mb, 2_1m.fb, 2_1m.lx") +(exclusion_set "2_1m.fi" "2_1m.mf, 2_1b.bb, 2_1m.bb, 2_1m.ib, 2_1m.mb,\ + 2_1m.fb, 2_1m.lx") +(exclusion_set "2_1m.mf" "2_1b.bb, 2_1m.bb, 2_1m.ib, 2_1m.mb, 2_1m.fb,\ + 2_1m.lx") +(exclusion_set "2_1b.bb" "2_1m.bb, 2_1m.ib, 2_1m.mb, 2_1m.fb, 2_1m.lx") +(exclusion_set "2_1m.bb" "2_1m.ib, 2_1m.mb, 2_1m.fb, 2_1m.lx") +(exclusion_set "2_1m.ib" "2_1m.mb, 2_1m.fb, 2_1m.lx") +(exclusion_set "2_1m.mb" "2_1m.fb, 2_1m.lx") +(exclusion_set "2_1m.fb" "2_1m.lx") + +;; Slot 5 +(exclusion_set "2_1mi.i" "2_1mm.i, 2_1mf.i, 2_1mm.f, 2_1bb.b, 2_1mb.b,\ + 2_1mi.b, 2_1mm.b, 2_1mf.b, 2_1mlx.") +(exclusion_set "2_1mm.i" "2_1mf.i, 2_1mm.f, 2_1bb.b, 2_1mb.b,\ + 2_1mi.b, 2_1mm.b, 2_1mf.b, 2_1mlx.") +(exclusion_set "2_1mf.i" "2_1mm.f, 2_1bb.b, 2_1mb.b, 2_1mi.b, 2_1mm.b,\ + 2_1mf.b, 2_1mlx.") +(exclusion_set "2_1mm.f" "2_1bb.b, 2_1mb.b, 2_1mi.b, 2_1mm.b, 2_1mf.b,\ + 2_1mlx.") +(exclusion_set "2_1bb.b" "2_1mb.b, 2_1mi.b, 2_1mm.b, 2_1mf.b, 2_1mlx.") +(exclusion_set "2_1mb.b" "2_1mi.b, 2_1mm.b, 2_1mf.b, 2_1mlx.") +(exclusion_set "2_1mi.b" "2_1mm.b, 2_1mf.b, 2_1mlx.") +(exclusion_set "2_1mm.b" "2_1mf.b, 2_1mlx.") +(exclusion_set "2_1mf.b" "2_1mlx.") + +;; Slot 6 +(exclusion_set "2_1mii." "2_1mmi., 2_1mfi., 2_1mmf., 2_1bbb., 2_1mbb.,\ + 2_1mib., 2_1mmb., 2_1mfb., 2_1mlx.") +(exclusion_set "2_1mmi." "2_1mfi., 2_1mmf., 2_1bbb., 2_1mbb.,\ + 2_1mib., 2_1mmb., 2_1mfb., 2_1mlx.") +(exclusion_set "2_1mfi." "2_1mmf., 2_1bbb., 2_1mbb., 2_1mib., 2_1mmb.,\ + 2_1mfb., 2_1mlx.") +(exclusion_set "2_1mmf." "2_1bbb., 2_1mbb., 2_1mib., 2_1mmb., 2_1mfb.,\ + 2_1mlx.") +(exclusion_set "2_1bbb." "2_1mbb., 2_1mib., 2_1mmb., 2_1mfb., 2_1mlx.") +(exclusion_set "2_1mbb." "2_1mib., 2_1mmb., 2_1mfb., 2_1mlx.") +(exclusion_set "2_1mib." "2_1mmb., 2_1mfb., 2_1mlx.") +(exclusion_set "2_1mmb." "2_1mfb., 2_1mlx.") +(exclusion_set "2_1mfb." "2_1mlx.") + +(final_presence_set "2_0mi.i" "2_0m.ii") +(final_presence_set "2_0mii." "2_0mi.i") +(final_presence_set "2_1mi.i" "2_1m.ii") +(final_presence_set "2_1mii." "2_1mi.i") + +(final_presence_set "2_0mm.i" "2_0m.mi") +(final_presence_set "2_0mmi." "2_0mm.i") +(final_presence_set "2_1mm.i" "2_1m.mi") +(final_presence_set "2_1mmi." "2_1mm.i") + +(final_presence_set "2_0mf.i" "2_0m.fi") +(final_presence_set "2_0mfi." "2_0mf.i") +(final_presence_set "2_1mf.i" "2_1m.fi") +(final_presence_set "2_1mfi." "2_1mf.i") + +(final_presence_set "2_0mm.f" "2_0m.mf") +(final_presence_set "2_0mmf." "2_0mm.f") +(final_presence_set "2_1mm.f" "2_1m.mf") +(final_presence_set "2_1mmf." "2_1mm.f") + +(final_presence_set "2_0bb.b" "2_0b.bb") +(final_presence_set "2_0bbb." "2_0bb.b") +(final_presence_set "2_1bb.b" "2_1b.bb") +(final_presence_set "2_1bbb." "2_1bb.b") + +(final_presence_set "2_0mb.b" "2_0m.bb") +(final_presence_set "2_0mbb." "2_0mb.b") +(final_presence_set "2_1mb.b" "2_1m.bb") +(final_presence_set "2_1mbb." "2_1mb.b") + +(final_presence_set "2_0mi.b" "2_0m.ib") +(final_presence_set "2_0mib." "2_0mi.b") +(final_presence_set "2_1mi.b" "2_1m.ib") +(final_presence_set "2_1mib." "2_1mi.b") + +(final_presence_set "2_0mm.b" "2_0m.mb") +(final_presence_set "2_0mmb." "2_0mm.b") +(final_presence_set "2_1mm.b" "2_1m.mb") +(final_presence_set "2_1mmb." "2_1mm.b") + +(final_presence_set "2_0mf.b" "2_0m.fb") +(final_presence_set "2_0mfb." "2_0mf.b") +(final_presence_set "2_1mf.b" "2_1m.fb") +(final_presence_set "2_1mfb." "2_1mf.b") + +(final_presence_set "2_0mlx." "2_0m.lx") +(final_presence_set "2_1mlx." "2_1m.lx") + +;; The following reflects the dual issue bundle types table. +;; We could place all possible combinations here because impossible +;; combinations would go away by the subsequent constrains. +(final_presence_set + "2_1m.lx" + "2_0mmi.,2_0mfi.,2_0mmf.,2_0mib.,2_0mmb.,2_0mfb.,2_0mlx.") +(final_presence_set "2_1b.bb" "2_0mii.,2_0mmi.,2_0mfi.,2_0mmf.,2_0mlx.") +(final_presence_set + "2_1m.ii,2_1m.mi,2_1m.fi,2_1m.mf,2_1m.bb,2_1m.ib,2_1m.mb,2_1m.fb" + "2_0mii.,2_0mmi.,2_0mfi.,2_0mmf.,2_0mib.,2_0mmb.,2_0mfb.,2_0mlx.") + +;; Ports/units (nb means nop.b insn issued into given port): +(define_cpu_unit + "2_um0, 2_um1, 2_um2, 2_um3, 2_ui0, 2_ui1, 2_uf0, 2_uf1,\ + 2_ub0, 2_ub1, 2_ub2, 2_unb0, 2_unb1, 2_unb2" "two") + +(exclusion_set "2_ub0" "2_unb0") +(exclusion_set "2_ub1" "2_unb1") +(exclusion_set "2_ub2" "2_unb2") + +;; The following rules are used to decrease number of alternatives. +;; They are consequences of Itanium2 microarchitecture. They also +;; describe the following rules mentioned in Itanium2 +;; microarchitecture: rules mentioned in Itanium2 microarchitecture: +;; o "BBB/MBB: Always splits issue after either of these bundles". +;; o "MIB BBB: Split issue after the first bundle in this pair". +(exclusion_set + "2_0b.bb,2_0bb.b,2_0bbb.,2_0m.bb,2_0mb.b,2_0mbb." + "2_1m.ii,2_1m.mi,2_1m.fi,2_1m.mf,2_1b.bb,2_1m.bb,\ + 2_1m.ib,2_1m.mb,2_1m.fb,2_1m.lx") +(exclusion_set "2_0m.ib,2_0mi.b,2_0mib." "2_1b.bb") + +;;; "MIB/MFB/MMB: Splits issue after any of these bundles unless the +;;; B-slot contains a nop.b or a brp instruction". +;;; "The B in an MIB/MFB/MMB bundle disperses to B0 if it is a brp or +;;; nop.b, otherwise it disperses to B2". +(final_absence_set + "2_1m.ii, 2_1m.mi, 2_1m.fi, 2_1m.mf, 2_1b.bb, 2_1m.bb,\ + 2_1m.ib, 2_1m.mb, 2_1m.fb, 2_1m.lx" + "2_0mib. 2_ub2, 2_0mfb. 2_ub2, 2_0mmb. 2_ub2") + +;; This is necessary to start new processor cycle when we meet stop bit. +(define_cpu_unit "2_stop" "two") +(final_absence_set + "2_0m.ii,2_0mi.i,2_0mii.,2_0m.mi,2_0mm.i,2_0mmi.,2_0m.fi,2_0mf.i,2_0mfi.,\ + 2_0m.mf,2_0mm.f,2_0mmf.,2_0b.bb,2_0bb.b,2_0bbb.,2_0m.bb,2_0mb.b,2_0mbb.,\ + 2_0m.ib,2_0mi.b,2_0mib.,2_0m.mb,2_0mm.b,2_0mmb.,2_0m.fb,2_0mf.b,2_0mfb.,\ + 2_0m.lx,2_0mlx., \ + 2_1m.ii,2_1mi.i,2_1mii.,2_1m.mi,2_1mm.i,2_1mmi.,2_1m.fi,2_1mf.i,2_1mfi.,\ + 2_1m.mf,2_1mm.f,2_1mmf.,2_1b.bb,2_1bb.b,2_1bbb.,2_1m.bb,2_1mb.b,2_1mbb.,\ + 2_1m.ib,2_1mi.b,2_1mib.,2_1m.mb,2_1mm.b,2_1mmb.,2_1m.fb,2_1mf.b,2_1mfb.,\ + 2_1m.lx,2_1mlx." + "2_stop") + +;; The issue logic can reorder M slot insns between different subtypes +;; but can not reorder insn within the same subtypes. The following +;; constraint is enough to describe this. +(final_presence_set "2_um1" "2_um0") +(final_presence_set "2_um3" "2_um2") + +;; The insn in the 1st I slot of the two bundle issue group will issue +;; to I0. The second I slot insn will issue to I1. +(final_presence_set "2_ui1" "2_ui0") + +;; For exceptions of I insns: +(define_cpu_unit "2_only_ui0" "two") +(final_absence_set "2_only_ui0" "2_ui1") + +;; Insns + +(define_reservation "2_M0" + "(2_0m.ii|2_0m.mi|2_0m.fi|2_0m.mf|2_0m.bb|2_0m.ib|2_0m.mb|2_0m.fb|2_0m.lx\ + |2_1m.ii|2_1m.mi|2_1m.fi|2_1m.mf|2_1m.bb|2_1m.ib|2_1m.mb|2_1m.fb|2_1m.lx\ + |2_0mm.i|2_0mm.f|2_0mm.b|2_1mm.i|2_1mm.f|2_1mm.b)\ + +(2_um0|2_um1|2_um2|2_um3)") + +(define_reservation "2_M1" + "(2_0mii.+(2_ui0|2_ui1)|2_0mmi.+2_ui0|2_0mfi.+2_ui0|2_0mmf.+2_uf0\ + |2_0mib.+2_unb0|2_0mfb.+2_unb0|2_0mmb.+2_unb0)\ + +(2_1m.ii|2_1m.mi|2_1m.fi|2_1m.mf|2_1m.bb|2_1m.ib|2_1m.mb|2_1m.fb|2_1m.lx)\ + +(2_um0|2_um1|2_um2|2_um3)") + +(define_reservation "2_M" "2_M0|2_M1") + +(define_reservation "2_M0_only_um0" + "(2_0m.ii|2_0m.mi|2_0m.fi|2_0m.mf|2_0m.bb|2_0m.ib|2_0m.mb|2_0m.fb|2_0m.lx\ + |2_1m.ii|2_1m.mi|2_1m.fi|2_1m.mf|2_1m.bb|2_1m.ib|2_1m.mb|2_1m.fb|2_1m.lx\ + |2_0mm.i|2_0mm.f|2_0mm.b|2_1mm.i|2_1mm.f|2_1mm.b)\ + +2_um0") + +(define_reservation "2_M1_only_um0" + "(2_0mii.+(2_ui0|2_ui1)|2_0mmi.+2_ui0|2_0mfi.+2_ui0|2_0mmf.+2_uf0\ + |2_0mib.+2_unb0|2_0mfb.+2_unb0|2_0mmb.+2_unb0)\ + +(2_1m.ii|2_1m.mi|2_1m.fi|2_1m.mf|2_1m.bb|2_1m.ib|2_1m.mb|2_1m.fb|2_1m.lx)\ + +2_um0") + +(define_reservation "2_M_only_um0" "2_M0_only_um0|2_M1_only_um0") + +(define_reservation "2_M0_only_um2" + "(2_0m.ii|2_0m.mi|2_0m.fi|2_0m.mf|2_0m.bb|2_0m.ib|2_0m.mb|2_0m.fb|2_0m.lx\ + |2_1m.ii|2_1m.mi|2_1m.fi|2_1m.mf|2_1m.bb|2_1m.ib|2_1m.mb|2_1m.fb|2_1m.lx\ + |2_0mm.i|2_0mm.f|2_0mm.b|2_1mm.i|2_1mm.f|2_1mm.b)\ + +2_um2") + +(define_reservation "2_M1_only_um2" + "(2_0mii.+(2_ui0|2_ui1)|2_0mmi.+2_ui0|2_0mfi.+2_ui0|2_0mmf.+2_uf0\ + |2_0mib.+2_unb0|2_0mfb.+2_unb0|2_0mmb.+2_unb0)\ + +(2_1m.ii|2_1m.mi|2_1m.fi|2_1m.mf|2_1m.bb|2_1m.ib|2_1m.mb|2_1m.fb|2_1m.lx)\ + +2_um2") + +(define_reservation "2_M_only_um2" "2_M0_only_um2|2_M1_only_um2") + +(define_reservation "2_M0_only_um23" + "(2_0m.ii|2_0m.mi|2_0m.fi|2_0m.mf|2_0m.bb|2_0m.ib|2_0m.mb|2_0m.fb|2_0m.lx\ + |2_1m.ii|2_1m.mi|2_1m.fi|2_1m.mf|2_1m.bb|2_1m.ib|2_1m.mb|2_1m.fb|2_1m.lx\ + |2_0mm.i|2_0mm.f|2_0mm.b|2_1mm.i|2_1mm.f|2_1mm.b)\ + +(2_um2|2_um3)") + +(define_reservation "2_M1_only_um23" + "(2_0mii.+(2_ui0|2_ui1)|2_0mmi.+2_ui0|2_0mfi.+2_ui0|2_0mmf.+2_uf0\ + |2_0mib.+2_unb0|2_0mfb.+2_unb0|2_0mmb.+2_unb0)\ + +(2_1m.ii|2_1m.mi|2_1m.fi|2_1m.mf|2_1m.bb|2_1m.ib|2_1m.mb|2_1m.fb|2_1m.lx)\ + +(2_um2|2_um3)") + +(define_reservation "2_M_only_um23" "2_M0_only_um23|2_M1_only_um23") + +(define_reservation "2_M0_only_um01" + "(2_0m.ii|2_0m.mi|2_0m.fi|2_0m.mf|2_0m.bb|2_0m.ib|2_0m.mb|2_0m.fb|2_0m.lx\ + |2_1m.ii|2_1m.mi|2_1m.fi|2_1m.mf|2_1m.bb|2_1m.ib|2_1m.mb|2_1m.fb|2_1m.lx\ + |2_0mm.i|2_0mm.f|2_0mm.b|2_1mm.i|2_1mm.f|2_1mm.b)\ + +(2_um0|2_um1)") + +(define_reservation "2_M1_only_um01" + "(2_0mii.+(2_ui0|2_ui1)|2_0mmi.+2_ui0|2_0mfi.+2_ui0|2_0mmf.+2_uf0\ + |2_0mib.+2_unb0|2_0mfb.+2_unb0|2_0mmb.+2_unb0)\ + +(2_1m.ii|2_1m.mi|2_1m.fi|2_1m.mf|2_1m.bb|2_1m.ib|2_1m.mb|2_1m.fb|2_1m.lx)\ + +(2_um0|2_um1)") + +(define_reservation "2_M_only_um01" "2_M0_only_um01|2_M1_only_um01") + +;; I instruction is dispersed to the lowest numbered I unit +;; not already in use. Remeber about possible spliting. +(define_reservation "2_I0" + "2_0mi.i+2_ui0|2_0mii.+(2_ui0|2_ui1)|2_0mmi.+2_ui0\ + |2_0mfi.+2_ui0|2_0mi.b+2_ui0|(2_1mi.i|2_1mi.b)+(2_ui0|2_ui1)\ + |(2_1mii.|2_1mmi.|2_1mfi.)+(2_ui0|2_ui1)") + +(define_reservation "2_I1" + "2_0m.ii+(2_um0|2_um1|2_um2|2_um3)+2_0mi.i+2_ui0\ + |2_0mm.i+(2_um0|2_um1|2_um2|2_um3)+2_0mmi.+2_ui0\ + |2_0mf.i+2_uf0+2_0mfi.+2_ui0\ + |2_0m.ib+(2_um0|2_um1|2_um2|2_um3)+2_0mi.b+2_ui0\ + |(2_1m.ii+2_1mi.i|2_1m.ib+2_1mi.b)+(2_um0|2_um1|2_um2|2_um3)+(2_ui0|2_ui1)\ + |2_1mm.i+(2_um0|2_um1|2_um2|2_um3)+2_1mmi.+(2_ui0|2_ui1)\ + |2_1mf.i+2_uf1+2_1mfi.+(2_ui0|2_ui1)") + +(define_reservation "2_I" "2_I0|2_I1") + +;; "An F slot in the 1st bundle disperses to F0". +;; "An F slot in the 2st bundle disperses to F1". +(define_reservation "2_F0" + "2_0mf.i+2_uf0|2_0mmf.+2_uf0|2_0mf.b+2_uf0\ + |2_1mf.i+2_uf1|2_1mmf.+2_uf1|2_1mf.b+2_uf1") + +(define_reservation "2_F1" + "(2_0m.fi+2_0mf.i|2_0mm.f+2_0mmf.|2_0m.fb+2_0mf.b)\ + +(2_um0|2_um1|2_um2|2_um3)+2_uf0\ + |(2_1m.fi+2_1mf.i|2_1mm.f+2_1mmf.|2_1m.fb+2_1mf.b)\ + +(2_um0|2_um1|2_um2|2_um3)+2_uf1") + +(define_reservation "2_F2" + "(2_0m.mf+2_0mm.f+2_0mmf.+2_uf0|2_1m.mf+2_1mm.f+2_1mmf.+2_uf1)\ + +(2_um0|2_um1|2_um2|2_um3)+(2_um0|2_um1|2_um2|2_um3)\ + |(2_0mii.+(2_ui0|2_ui1)|2_0mmi.+2_ui0|2_0mfi.+2_ui0\ + |2_0mmf.+(2_um0|2_um1|2_um2|2_um3)\ + |2_0mib.+2_unb0|2_0mmb.+2_unb0|2_0mfb.+2_unb0)\ + +(2_1m.fi+2_1mf.i|2_1m.fb+2_1mf.b)+(2_um0|2_um1|2_um2|2_um3)+2_uf1") + +(define_reservation "2_F" "2_F0|2_F1|2_F2") + +;;; "Each B slot in MBB or BBB bundle disperses to the corresponding B +;;; unit. That is, a B slot in 1st position is despersed to B0. In the +;;; 2nd position it is dispersed to B2". +(define_reservation "2_NB" + "2_0b.bb+2_unb0|2_0bb.b+2_unb1|2_0bbb.+2_unb2\ + |2_0mb.b+2_unb1|2_0mbb.+2_unb2|2_0mib.+2_unb0\ + |2_0mmb.+2_unb0|2_0mfb.+2_unb0\ + |2_1b.bb+2_unb0|2_1bb.b+2_unb1 + |2_1bbb.+2_unb2|2_1mb.b+2_unb1|2_1mbb.+2_unb2\ + |2_1mib.+2_unb0|2_1mmb.+2_unb0|2_1mfb.+2_unb0") + +(define_reservation "2_B0" + "2_0b.bb+2_ub0|2_0bb.b+2_ub1|2_0bbb.+2_ub2\ + |2_0mb.b+2_ub1|2_0mbb.+2_ub2|2_0mib.+2_ub2\ + |2_0mfb.+2_ub2|2_1b.bb+2_ub0|2_1bb.b+2_ub1\ + |2_1bbb.+2_ub2|2_1mb.b+2_ub1\ + |2_1mib.+2_ub2|2_1mmb.+2_ub2|2_1mfb.+2_ub2") + +(define_reservation "2_B1" + "2_0m.bb+(2_um0|2_um1|2_um2|2_um3)+2_0mb.b+2_ub1\ + |2_0mi.b+2_ui0+2_0mib.+2_ub2\ + |2_0mm.b+(2_um0|2_um1|2_um2|2_um3)+2_0mmb.+2_ub2\ + |2_0mf.b+2_uf0+2_0mfb.+2_ub2\ + |(2_0mii.+(2_ui0|2_ui1)|2_0mmi.+2_ui0|2_0mfi.+2_ui0|2_0mmf.+2_uf0)\ + +2_1b.bb+2_ub0\ + |2_1m.bb+(2_um0|2_um1|2_um2|2_um3)+2_1mb.b+2_ub1\ + |2_1mi.b+(2_ui0|2_ui1)+2_1mib.+2_ub2\ + |2_1mm.b+(2_um0|2_um1|2_um2|2_um3)+2_1mmb.+2_ub2\ + |2_1mf.b+2_uf1+2_1mfb.+2_ub2") + +(define_reservation "2_B" "2_B0|2_B1") + +;; MLX bunlde uses ports equivalent to MFI bundles. + +;; For the MLI template, the I slot insn is always assigned to port I0 +;; if it is in the first bundle or it is assigned to port I1 if it is in +;; the second bundle. +(define_reservation "2_L0" "2_0mlx.+2_ui0+2_uf0|2_1mlx.+2_ui1+2_uf1") + +(define_reservation "2_L1" + "2_0m.lx+(2_um0|2_um1|2_um2|2_um3)+2_0mlx.+2_ui0+2_uf0\ + |2_1m.lx+(2_um0|2_um1|2_um2|2_um3)+2_1mlx.+2_ui1+2_uf1") + +(define_reservation "2_L2" + "(2_0mii.+(2_ui0|2_ui1)|2_0mmi.+2_ui0|2_0mfi.+2_ui0|2_0mmf.+2_uf0\ + |2_0mib.+2_unb0|2_0mmb.+2_unb0|2_0mfb.+2_unb0) + +2_1m.lx+(2_um0|2_um1|2_um2|2_um3)+2_1mlx.+2_ui1+2_uf1") + +(define_reservation "2_L" "2_L0|2_L1|2_L2") + +;; Should we describe that A insn in I slot can be issued into M +;; ports? I think it is not necessary because of multipass +;; scheduling. For example, the multipass scheduling could use +;; MMI-MMI instead of MII-MII where the two last I slots contain A +;; insns (even if the case is complicated by use-def conflicts). +;; +;; In any case we could describe it as +;; (define_cpu_unit "2_ui1_0pres,2_ui1_1pres,2_ui1_2pres,2_ui1_3pres" "two") +;; (final_presence_set "2_ui1_0pres,2_ui1_1pres,2_ui1_2pres,2_ui1_3pres" +;; "2_ui1") +;; (define_reservation "b_A" +;; "b_M|b_I\ +;; |(2_1mi.i|2_1mii.|2_1mmi.|2_1mfi.|2_1mi.b)+(2_um0|2_um1|2_um2|2_um3)\ +;; +(2_ui1_0pres|2_ui1_1pres|2_ui1_2pres|2_ui1_3pres)") + +(define_reservation "2_A" "2_M|2_I") + +;; We assume that there is no insn issued on the same cycle as the +;; unknown insn. +(define_cpu_unit "2_empty" "two") +(exclusion_set "2_empty" + "2_0m.ii,2_0m.mi,2_0m.fi,2_0m.mf,2_0b.bb,2_0m.bb,2_0m.ib,2_0m.mb,2_0m.fb,\ + 2_0m.lx") + +(define_cpu_unit + "2_0m_bs, 2_0mi_bs, 2_0mm_bs, 2_0mf_bs, 2_0b_bs, 2_0bb_bs, 2_0mb_bs" + "two") +(define_cpu_unit + "2_1m_bs, 2_1mi_bs, 2_1mm_bs, 2_1mf_bs, 2_1b_bs, 2_1bb_bs, 2_1mb_bs" + "two") + +(define_cpu_unit "2_m_cont, 2_mi_cont, 2_mm_cont, 2_mf_cont, 2_mb_cont,\ + 2_b_cont, 2_bb_cont" "two") + +;; For stop in the middle of the bundles. +(define_cpu_unit "2_m_stop, 2_m0_stop, 2_m1_stop, 2_0mmi_cont" "two") +(define_cpu_unit "2_mi_stop, 2_mi0_stop, 2_mi1_stop, 2_0mii_cont" "two") + +(final_presence_set "2_0m_bs" + "2_0m.ii, 2_0m.mi, 2_0m.mf, 2_0m.fi, 2_0m.bb,\ + 2_0m.ib, 2_0m.fb, 2_0m.mb, 2_0m.lx") +(final_presence_set "2_1m_bs" + "2_1m.ii, 2_1m.mi, 2_1m.mf, 2_1m.fi, 2_1m.bb,\ + 2_1m.ib, 2_1m.fb, 2_1m.mb, 2_1m.lx") +(final_presence_set "2_0mi_bs" "2_0mi.i, 2_0mi.i") +(final_presence_set "2_1mi_bs" "2_1mi.i, 2_1mi.i") +(final_presence_set "2_0mm_bs" "2_0mm.i, 2_0mm.f, 2_0mm.b") +(final_presence_set "2_1mm_bs" "2_1mm.i, 2_1mm.f, 2_1mm.b") +(final_presence_set "2_0mf_bs" "2_0mf.i, 2_0mf.b") +(final_presence_set "2_1mf_bs" "2_1mf.i, 2_1mf.b") +(final_presence_set "2_0b_bs" "2_0b.bb") +(final_presence_set "2_1b_bs" "2_1b.bb") +(final_presence_set "2_0bb_bs" "2_0bb.b") +(final_presence_set "2_1bb_bs" "2_1bb.b") +(final_presence_set "2_0mb_bs" "2_0mb.b") +(final_presence_set "2_1mb_bs" "2_1mb.b") + +(exclusion_set "2_0m_bs" + "2_0mi.i, 2_0mm.i, 2_0mm.f, 2_0mf.i, 2_0mb.b,\ + 2_0mi.b, 2_0mf.b, 2_0mm.b, 2_0mlx., 2_m0_stop") +(exclusion_set "2_1m_bs" + "2_1mi.i, 2_1mm.i, 2_1mm.f, 2_1mf.i, 2_1mb.b,\ + 2_1mi.b, 2_1mf.b, 2_1mm.b, 2_1mlx., 2_m1_stop") +(exclusion_set "2_0mi_bs" "2_0mii., 2_0mib., 2_mi0_stop") +(exclusion_set "2_1mi_bs" "2_1mii., 2_1mib., 2_mi1_stop") +(exclusion_set "2_0mm_bs" "2_0mmi., 2_0mmf., 2_0mmb.") +(exclusion_set "2_1mm_bs" "2_1mmi., 2_1mmf., 2_1mmb.") +(exclusion_set "2_0mf_bs" "2_0mfi., 2_0mfb.") +(exclusion_set "2_1mf_bs" "2_1mfi., 2_1mfb.") +(exclusion_set "2_0b_bs" "2_0bb.b") +(exclusion_set "2_1b_bs" "2_1bb.b") +(exclusion_set "2_0bb_bs" "2_0bbb.") +(exclusion_set "2_1bb_bs" "2_1bbb.") +(exclusion_set "2_0mb_bs" "2_0mbb.") +(exclusion_set "2_1mb_bs" "2_1mbb.") + +(exclusion_set + "2_0m_bs, 2_0mi_bs, 2_0mm_bs, 2_0mf_bs, 2_0b_bs, 2_0bb_bs, 2_0mb_bs, + 2_1m_bs, 2_1mi_bs, 2_1mm_bs, 2_1mf_bs, 2_1b_bs, 2_1bb_bs, 2_1mb_bs" + "2_stop") + +(final_presence_set + "2_0mi.i, 2_0mm.i, 2_0mf.i, 2_0mm.f, 2_0mb.b,\ + 2_0mi.b, 2_0mm.b, 2_0mf.b, 2_0mlx." + "2_m_cont") +(final_presence_set "2_0mii., 2_0mib." "2_mi_cont") +(final_presence_set "2_0mmi., 2_0mmf., 2_0mmb." "2_mm_cont") +(final_presence_set "2_0mfi., 2_0mfb." "2_mf_cont") +(final_presence_set "2_0bb.b" "2_b_cont") +(final_presence_set "2_0bbb." "2_bb_cont") +(final_presence_set "2_0mbb." "2_mb_cont") + +(exclusion_set + "2_0m.ii, 2_0m.mi, 2_0m.fi, 2_0m.mf, 2_0b.bb, 2_0m.bb,\ + 2_0m.ib, 2_0m.mb, 2_0m.fb, 2_0m.lx" + "2_m_cont, 2_mi_cont, 2_mm_cont, 2_mf_cont,\ + 2_mb_cont, 2_b_cont, 2_bb_cont") + +(exclusion_set "2_empty" + "2_m_cont,2_mi_cont,2_mm_cont,2_mf_cont,\ + 2_mb_cont,2_b_cont,2_bb_cont") + +;; For m;mi bundle +(final_presence_set "2_m0_stop" "2_0m.mi") +(final_presence_set "2_0mm.i" "2_0mmi_cont") +(exclusion_set "2_0mmi_cont" + "2_0m.ii, 2_0m.mi, 2_0m.fi, 2_0m.mf, 2_0b.bb, 2_0m.bb,\ + 2_0m.ib, 2_0m.mb, 2_0m.fb, 2_0m.lx") +(exclusion_set "2_m0_stop" "2_0mm.i") +(final_presence_set "2_m1_stop" "2_1m.mi") +(exclusion_set "2_m1_stop" "2_1mm.i") +(final_presence_set "2_m_stop" "2_m0_stop, 2_m1_stop") + +;; For mi;i bundle +(final_presence_set "2_mi0_stop" "2_0mi.i") +(final_presence_set "2_0mii." "2_0mii_cont") +(exclusion_set "2_0mii_cont" + "2_0m.ii, 2_0m.mi, 2_0m.fi, 2_0m.mf, 2_0b.bb, 2_0m.bb,\ + 2_0m.ib, 2_0m.mb, 2_0m.fb, 2_0m.lx") +(exclusion_set "2_mi0_stop" "2_0mii.") +(final_presence_set "2_mi1_stop" "2_1mi.i") +(exclusion_set "2_mi1_stop" "2_1mii.") +(final_presence_set "2_mi_stop" "2_mi0_stop, 2_mi1_stop") + +(final_absence_set + "2_0m.ii,2_0mi.i,2_0mii.,2_0m.mi,2_0mm.i,2_0mmi.,2_0m.fi,2_0mf.i,2_0mfi.,\ + 2_0m.mf,2_0mm.f,2_0mmf.,2_0b.bb,2_0bb.b,2_0bbb.,2_0m.bb,2_0mb.b,2_0mbb.,\ + 2_0m.ib,2_0mi.b,2_0mib.,2_0m.mb,2_0mm.b,2_0mmb.,2_0m.fb,2_0mf.b,2_0mfb.,\ + 2_0m.lx,2_0mlx., \ + 2_1m.ii,2_1mi.i,2_1mii.,2_1m.mi,2_1mm.i,2_1mmi.,2_1m.fi,2_1mf.i,2_1mfi.,\ + 2_1m.mf,2_1mm.f,2_1mmf.,2_1b.bb,2_1bb.b,2_1bbb.,2_1m.bb,2_1mb.b,2_1mbb.,\ + 2_1m.ib,2_1mi.b,2_1mib.,2_1m.mb,2_1mm.b,2_1mmb.,2_1m.fb,2_1mf.b,2_1mfb.,\ + 2_1m.lx,2_1mlx." + "2_m0_stop,2_m1_stop,2_mi0_stop,2_mi1_stop") + +(define_insn_reservation "2_stop_bit" 0 + (and (and (eq_attr "cpu" "itanium2") + (eq_attr "itanium_class" "stop_bit")) + (eq (symbol_ref "bundling_p") (const_int 0))) + "2_stop|2_m0_stop|2_m1_stop|2_mi0_stop|2_mi1_stop") + +(define_insn_reservation "2_br" 0 + (and (and (eq_attr "cpu" "itanium2") + (eq_attr "itanium_class" "br")) + (eq (symbol_ref "bundling_p") (const_int 0))) "2_B") +(define_insn_reservation "2_scall" 0 + (and (and (eq_attr "cpu" "itanium2") + (eq_attr "itanium_class" "scall")) + (eq (symbol_ref "bundling_p") (const_int 0))) "2_B") +(define_insn_reservation "2_fcmp" 2 + (and (and (eq_attr "cpu" "itanium2") + (eq_attr "itanium_class" "fcmp")) + (eq (symbol_ref "bundling_p") (const_int 0))) "2_F") +(define_insn_reservation "2_fcvtfx" 4 + (and (and (eq_attr "cpu" "itanium2") + (eq_attr "itanium_class" "fcvtfx")) + (eq (symbol_ref "bundling_p") (const_int 0))) "2_F") +(define_insn_reservation "2_fld" 6 + (and (and (eq_attr "cpu" "itanium2") + (eq_attr "itanium_class" "fld")) + (eq (symbol_ref "bundling_p") (const_int 0))) "2_M") +(define_insn_reservation "2_fmac" 4 + (and (and (eq_attr "cpu" "itanium2") + (eq_attr "itanium_class" "fmac")) + (eq (symbol_ref "bundling_p") (const_int 0))) "2_F") +(define_insn_reservation "2_fmisc" 4 + (and (and (eq_attr "cpu" "itanium2") + (eq_attr "itanium_class" "fmisc")) + (eq (symbol_ref "bundling_p") (const_int 0))) "2_F") + +;; There is only one insn `mov = ar.bsp' for frar_i: +;; Latency time ??? +(define_insn_reservation "2_frar_i" 13 + (and (and (eq_attr "cpu" "itanium2") + (eq_attr "itanium_class" "frar_i")) + (eq (symbol_ref "bundling_p") (const_int 0))) + "2_I+2_only_ui0") +;; There is only two insns `mov = ar.unat' or `mov = ar.ccv' for frar_m: +;; Latency time ??? +(define_insn_reservation "2_frar_m" 6 + (and (and (eq_attr "cpu" "itanium2") + (eq_attr "itanium_class" "frar_m")) + (eq (symbol_ref "bundling_p") (const_int 0))) + "2_M_only_um2") +(define_insn_reservation "2_frbr" 2 + (and (and (eq_attr "cpu" "itanium2") + (eq_attr "itanium_class" "frbr")) + (eq (symbol_ref "bundling_p") (const_int 0))) + "2_I+2_only_ui0") +(define_insn_reservation "2_frfr" 5 + (and (and (eq_attr "cpu" "itanium2") + (eq_attr "itanium_class" "frfr")) + (eq (symbol_ref "bundling_p") (const_int 0))) + "2_M_only_um2") +(define_insn_reservation "2_frpr" 2 + (and (and (eq_attr "cpu" "itanium2") + (eq_attr "itanium_class" "frpr")) + (eq (symbol_ref "bundling_p") (const_int 0))) + "2_I+2_only_ui0") + +(define_insn_reservation "2_ialu" 1 + (and (and (eq_attr "cpu" "itanium2") + (eq_attr "itanium_class" "ialu")) + (eq (symbol_ref "bundling_p") (const_int 0))) + "2_A") +(define_insn_reservation "2_icmp" 1 + (and (and (eq_attr "cpu" "itanium2") + (eq_attr "itanium_class" "icmp")) + (eq (symbol_ref "bundling_p") (const_int 0))) "2_A") +(define_insn_reservation "2_ilog" 1 + (and (and (eq_attr "cpu" "itanium2") + (eq_attr "itanium_class" "ilog")) + (eq (symbol_ref "bundling_p") (const_int 0))) "2_A") +;; Latency time ??? +(define_insn_reservation "2_ishf" 1 + (and (and (eq_attr "cpu" "itanium2") + (eq_attr "itanium_class" "ishf")) + (eq (symbol_ref "bundling_p") (const_int 0))) + "2_I+2_only_ui0") +(define_insn_reservation "2_ld" 1 + (and (and (eq_attr "cpu" "itanium2") + (eq_attr "itanium_class" "ld")) + (eq (symbol_ref "bundling_p") (const_int 0))) + "2_M_only_um01") +(define_insn_reservation "2_long_i" 1 + (and (and (eq_attr "cpu" "itanium2") + (eq_attr "itanium_class" "long_i")) + (eq (symbol_ref "bundling_p") (const_int 0))) "2_L") + +(define_insn_reservation "2_mmmul" 2 + (and (and (eq_attr "cpu" "itanium2") + (eq_attr "itanium_class" "mmmul")) + (eq (symbol_ref "bundling_p") (const_int 0))) + "2_I+2_only_ui0") +;; Latency time ??? +(define_insn_reservation "2_mmshf" 2 + (and (and (eq_attr "cpu" "itanium2") + (eq_attr "itanium_class" "mmshf")) + (eq (symbol_ref "bundling_p") (const_int 0))) "2_I") +;; Latency time ??? +(define_insn_reservation "2_mmshfi" 1 + (and (and (eq_attr "cpu" "itanium2") + (eq_attr "itanium_class" "mmshfi")) + (eq (symbol_ref "bundling_p") (const_int 0))) "2_I") + +;; Now we have only one insn (flushrs) of such class. We assume that flushrs +;; is the 1st syllable of the bundle after stop bit. +(define_insn_reservation "2_rse_m" 0 + (and (and (eq_attr "cpu" "itanium2") + (eq_attr "itanium_class" "rse_m")) + (eq (symbol_ref "bundling_p") (const_int 0))) + "(2_0m.ii|2_0m.mi|2_0m.fi|2_0m.mf|2_0m.bb\ + |2_0m.ib|2_0m.mb|2_0m.fb|2_0m.lx)+2_um0") +(define_insn_reservation "2_sem" 0 + (and (and (eq_attr "cpu" "itanium2") + (eq_attr "itanium_class" "sem")) + (eq (symbol_ref "bundling_p") (const_int 0))) + "2_M_only_um23") + +(define_insn_reservation "2_stf" 1 + (and (and (eq_attr "cpu" "itanium2") + (eq_attr "itanium_class" "stf")) + (eq (symbol_ref "bundling_p") (const_int 0))) + "2_M_only_um23") +(define_insn_reservation "2_st" 1 + (and (and (eq_attr "cpu" "itanium2") + (eq_attr "itanium_class" "st")) + (eq (symbol_ref "bundling_p") (const_int 0))) + "2_M_only_um23") +(define_insn_reservation "2_syst_m0" 0 + (and (and (eq_attr "cpu" "itanium2") + (eq_attr "itanium_class" "syst_m0")) + (eq (symbol_ref "bundling_p") (const_int 0))) + "2_M_only_um2") +(define_insn_reservation "2_syst_m" 0 + (and (and (eq_attr "cpu" "itanium2") + (eq_attr "itanium_class" "syst_m")) + (eq (symbol_ref "bundling_p") (const_int 0))) + "2_M_only_um0") +;; Reservation??? +(define_insn_reservation "2_tbit" 1 + (and (and (eq_attr "cpu" "itanium2") + (eq_attr "itanium_class" "tbit")) + (eq (symbol_ref "bundling_p") (const_int 0))) + "2_I+2_only_ui0") + +;; There is only ony insn `mov ar.pfs =' for toar_i: +(define_insn_reservation "2_toar_i" 0 + (and (and (eq_attr "cpu" "itanium2") + (eq_attr "itanium_class" "toar_i")) + (eq (symbol_ref "bundling_p") (const_int 0))) + "2_I+2_only_ui0") +;; There are only ony 2 insns `mov ar.ccv =' and `mov ar.unat =' for toar_m: +;; Latency time ??? +(define_insn_reservation "2_toar_m" 5 + (and (and (eq_attr "cpu" "itanium2") + (eq_attr "itanium_class" "toar_m")) + (eq (symbol_ref "bundling_p") (const_int 0))) + "2_M_only_um2") +;; Latency time ??? +(define_insn_reservation "2_tobr" 1 + (and (and (eq_attr "cpu" "itanium2") + (eq_attr "itanium_class" "tobr")) + (eq (symbol_ref "bundling_p") (const_int 0))) + "2_I+2_only_ui0") +(define_insn_reservation "2_tofr" 5 + (and (and (eq_attr "cpu" "itanium2") + (eq_attr "itanium_class" "tofr")) + (eq (symbol_ref "bundling_p") (const_int 0))) + "2_M_only_um23") +;; Latency time ??? +(define_insn_reservation "2_topr" 1 + (and (and (eq_attr "cpu" "itanium2") + (eq_attr "itanium_class" "topr")) + (eq (symbol_ref "bundling_p") (const_int 0))) + "2_I+2_only_ui0") + +(define_insn_reservation "2_xmpy" 4 + (and (and (eq_attr "cpu" "itanium2") + (eq_attr "itanium_class" "xmpy")) + (eq (symbol_ref "bundling_p") (const_int 0))) "2_F") +;; Latency time ??? +(define_insn_reservation "2_xtd" 1 + (and (and (eq_attr "cpu" "itanium2") + (eq_attr "itanium_class" "xtd")) + (eq (symbol_ref "bundling_p") (const_int 0))) "2_I") + +(define_insn_reservation "2_chk_s" 0 + (and (and (eq_attr "cpu" "itanium2") + (eq_attr "itanium_class" "chk_s")) + (eq (symbol_ref "bundling_p") (const_int 0))) + "2_I|2_M_only_um23") +(define_insn_reservation "2_lfetch" 0 + (and (and (eq_attr "cpu" "itanium2") + (eq_attr "itanium_class" "lfetch")) + (eq (symbol_ref "bundling_p") (const_int 0))) + "2_M_only_um01") + +(define_insn_reservation "2_nop_m" 0 + (and (and (eq_attr "cpu" "itanium2") + (eq_attr "itanium_class" "nop_m")) + (eq (symbol_ref "bundling_p") (const_int 0))) "2_M0") +(define_insn_reservation "2_nop_b" 0 + (and (and (eq_attr "cpu" "itanium2") + (eq_attr "itanium_class" "nop_b")) + (eq (symbol_ref "bundling_p") (const_int 0))) "2_NB") +(define_insn_reservation "2_nop_i" 0 + (and (and (eq_attr "cpu" "itanium2") + (eq_attr "itanium_class" "nop_i")) + (eq (symbol_ref "bundling_p") (const_int 0))) "2_I0") +(define_insn_reservation "2_nop_f" 0 + (and (and (eq_attr "cpu" "itanium2") + (eq_attr "itanium_class" "nop_f")) + (eq (symbol_ref "bundling_p") (const_int 0))) "2_F0") +(define_insn_reservation "2_nop_x" 0 + (and (and (eq_attr "cpu" "itanium2") + (eq_attr "itanium_class" "nop_x")) + (eq (symbol_ref "bundling_p") (const_int 0))) "2_L0") + +(define_insn_reservation "2_unknown" 1 + (and (and (eq_attr "cpu" "itanium2") + (eq_attr "itanium_class" "unknown")) + (eq (symbol_ref "bundling_p") (const_int 0))) "2_empty") + +(define_insn_reservation "2_nop" 0 + (and (and (eq_attr "cpu" "itanium2") + (eq_attr "itanium_class" "nop")) + (eq (symbol_ref "bundling_p") (const_int 0))) + "2_M0|2_NB|2_I0|2_F0") + +(define_insn_reservation "2_ignore" 0 + (and (and (eq_attr "cpu" "itanium2") + (eq_attr "itanium_class" "ignore")) + (eq (symbol_ref "bundling_p") (const_int 0))) "nothing") + +(define_cpu_unit "2_m_cont_only, 2_b_cont_only" "two") +(define_cpu_unit "2_mi_cont_only, 2_mm_cont_only, 2_mf_cont_only" "two") +(define_cpu_unit "2_mb_cont_only, 2_bb_cont_only" "two") + +(final_presence_set "2_m_cont_only" "2_m_cont") +(exclusion_set "2_m_cont_only" + "2_0mi.i, 2_0mm.i, 2_0mf.i, 2_0mm.f, 2_0mb.b,\ + 2_0mi.b, 2_0mm.b, 2_0mf.b, 2_0mlx.") + +(final_presence_set "2_b_cont_only" "2_b_cont") +(exclusion_set "2_b_cont_only" "2_0bb.b") + +(final_presence_set "2_mi_cont_only" "2_mi_cont") +(exclusion_set "2_mi_cont_only" "2_0mii., 2_0mib.") + +(final_presence_set "2_mm_cont_only" "2_mm_cont") +(exclusion_set "2_mm_cont_only" "2_0mmi., 2_0mmf., 2_0mmb.") + +(final_presence_set "2_mf_cont_only" "2_mf_cont") +(exclusion_set "2_mf_cont_only" "2_0mfi., 2_0mfb.") + +(final_presence_set "2_mb_cont_only" "2_mb_cont") +(exclusion_set "2_mb_cont_only" "2_0mbb.") + +(final_presence_set "2_bb_cont_only" "2_bb_cont") +(exclusion_set "2_bb_cont_only" "2_0bbb.") + +(define_insn_reservation "2_pre_cycle" 0 + (and (and (eq_attr "cpu" "itanium2") + (eq_attr "itanium_class" "pre_cycle")) + (eq (symbol_ref "bundling_p") (const_int 0))) + "nothing") + +;;(define_insn_reservation "2_pre_cycle" 0 +;; (and (and (eq_attr "cpu" "itanium2") +;; (eq_attr "itanium_class" "pre_cycle")) +;; (eq (symbol_ref "bundling_p") (const_int 0))) +;; "(2_0m_bs, 2_m_cont) \ +;; | (2_0mi_bs, (2_mi_cont|nothing)) \ +;; | (2_0mm_bs, 2_mm_cont) \ +;; | (2_0mf_bs, (2_mf_cont|nothing)) \ +;; | (2_0b_bs, (2_b_cont|nothing)) \ +;; | (2_0bb_bs, (2_bb_cont|nothing)) \ +;; | (2_0mb_bs, (2_mb_cont|nothing)) \ +;; | (2_1m_bs, 2_m_cont) \ +;; | (2_1mi_bs, (2_mi_cont|nothing)) \ +;; | (2_1mm_bs, 2_mm_cont) \ +;; | (2_1mf_bs, (2_mf_cont|nothing)) \ +;; | (2_1b_bs, (2_b_cont|nothing)) \ +;; | (2_1bb_bs, (2_bb_cont|nothing)) \ +;; | (2_1mb_bs, (2_mb_cont|nothing)) \ +;; | (2_m_cont_only, (2_m_cont|nothing)) \ +;; | (2_b_cont_only, (2_b_cont|nothing)) \ +;; | (2_mi_cont_only, (2_mi_cont|nothing)) \ +;; | (2_mm_cont_only, (2_mm_cont|nothing)) \ +;; | (2_mf_cont_only, (2_mf_cont|nothing)) \ +;; | (2_mb_cont_only, (2_mb_cont|nothing)) \ +;; | (2_bb_cont_only, (2_bb_cont|nothing)) \ +;; | (2_m_stop, (2_0mmi_cont|nothing)) \ +;; | (2_mi_stop, (2_0mii_cont|nothing))") + +;; Bypasses: + +(define_bypass 1 "2_fcmp" "2_br,2_scall") +(define_bypass 0 "2_icmp" "2_br,2_scall") +(define_bypass 0 "2_tbit" "2_br,2_scall") +(define_bypass 2 "2_ld" "2_ld" "ia64_ld_address_bypass_p") +(define_bypass 2 "2_ld" "2_st" "ia64_st_address_bypass_p") +(define_bypass 2 "2_ld" "2_mmmul,2_mmshf") +(define_bypass 3 "2_ilog" "2_mmmul,2_mmshf") +(define_bypass 3 "2_ialu" "2_mmmul,2_mmshf") +(define_bypass 3 "2_mmmul,2_mmshf" "2_ialu,2_ilog,2_ishf,2_st,2_ld") +(define_bypass 6 "2_tofr" "2_frfr,2_stf") +(define_bypass 7 "2_fmac" "2_frfr,2_stf") + +;; We don't use here fcmp because scall may be predicated. +(define_bypass 0 "2_fcvtfx,2_fld,2_fmac,2_fmisc,2_frar_i,2_frar_m,\ + 2_frbr,2_frfr,2_frpr,2_ialu,2_ilog,2_ishf,2_ld,2_long_i,\ + 2_mmmul,2_mmshf,2_mmshfi,2_toar_m,2_tofr,2_xmpy,2_xtd" + "2_scall") + +(define_bypass 0 "2_unknown,2_ignore,2_stop_bit,2_br,2_fcmp,2_fcvtfx,2_fld,\ + 2_fmac,2_fmisc,2_frar_i,2_frar_m,2_frbr,2_frfr,2_frpr,\ + 2_ialu,2_icmp,2_ilog,2_ishf,2_ld,2_chk_s,\ + 2_long_i,2_mmmul,2_mmshf,2_mmshfi,2_nop,2_nop_b,2_nop_f,\ + 2_nop_i,2_nop_m,2_nop_x,2_rse_m,2_scall,2_sem,2_stf,2_st,\ + 2_syst_m0,2_syst_m,2_tbit,2_toar_i,2_toar_m,2_tobr,2_tofr,\ + 2_topr,2_xmpy,2_xtd,2_lfetch" "2_ignore") + + + +;; Bundling + +(define_automaton "twob") + +;; Pseudo units for quicker searching for position in two packet window. */ +(define_query_cpu_unit "2_1,2_2,2_3,2_4,2_5,2_6" "twob") + +;; All possible combinations of bundles/syllables +(define_cpu_unit + "2b_0m.ii, 2b_0m.mi, 2b_0m.fi, 2b_0m.mf, 2b_0b.bb, 2b_0m.bb,\ + 2b_0m.ib, 2b_0m.mb, 2b_0m.fb, 2b_0m.lx" "twob") +(define_cpu_unit + "2b_0mi.i, 2b_0mm.i, 2b_0mf.i, 2b_0mm.f, 2b_0bb.b, 2b_0mb.b,\ + 2b_0mi.b, 2b_0mm.b, 2b_0mf.b" "twob") +(define_query_cpu_unit + "2b_0mii., 2b_0mmi., 2b_0mfi., 2b_0mmf., 2b_0bbb., 2b_0mbb.,\ + 2b_0mib., 2b_0mmb., 2b_0mfb., 2b_0mlx." "twob") + +(define_cpu_unit + "2b_1m.ii, 2b_1m.mi, 2b_1m.fi, 2b_1m.mf, 2b_1b.bb, 2b_1m.bb,\ + 2b_1m.ib, 2b_1m.mb, 2b_1m.fb, 2b_1m.lx" "twob") +(define_cpu_unit + "2b_1mi.i, 2b_1mm.i, 2b_1mf.i, 2b_1mm.f, 2b_1bb.b, 2b_1mb.b,\ + 2b_1mi.b, 2b_1mm.b, 2b_1mf.b" "twob") +(define_query_cpu_unit + "2b_1mii., 2b_1mmi., 2b_1mfi., 2b_1mmf., 2b_1bbb., 2b_1mbb.,\ + 2b_1mib., 2b_1mmb., 2b_1mfb., 2b_1mlx." "twob") + +;; Slot 1 +(exclusion_set "2b_0m.ii" + "2b_0m.mi, 2b_0m.fi, 2b_0m.mf, 2b_0b.bb, 2b_0m.bb,\ + 2b_0m.ib, 2b_0m.mb, 2b_0m.fb, 2b_0m.lx") +(exclusion_set "2b_0m.mi" + "2b_0m.fi, 2b_0m.mf, 2b_0b.bb, 2b_0m.bb, 2b_0m.ib,\ + 2b_0m.mb, 2b_0m.fb, 2b_0m.lx") +(exclusion_set "2b_0m.fi" + "2b_0m.mf, 2b_0b.bb, 2b_0m.bb, 2b_0m.ib, 2b_0m.mb, 2b_0m.fb, 2b_0m.lx") +(exclusion_set "2b_0m.mf" + "2b_0b.bb, 2b_0m.bb, 2b_0m.ib, 2b_0m.mb, 2b_0m.fb, 2b_0m.lx") +(exclusion_set "2b_0b.bb" "2b_0m.bb, 2b_0m.ib, 2b_0m.mb, 2b_0m.fb, 2b_0m.lx") +(exclusion_set "2b_0m.bb" "2b_0m.ib, 2b_0m.mb, 2b_0m.fb, 2b_0m.lx") +(exclusion_set "2b_0m.ib" "2b_0m.mb, 2b_0m.fb, 2b_0m.lx") +(exclusion_set "2b_0m.mb" "2b_0m.fb, 2b_0m.lx") +(exclusion_set "2b_0m.fb" "2b_0m.lx") + +;; Slot 2 +(exclusion_set "2b_0mi.i" + "2b_0mm.i, 2b_0mf.i, 2b_0mm.f, 2b_0bb.b, 2b_0mb.b,\ + 2b_0mi.b, 2b_0mm.b, 2b_0mf.b, 2b_0mlx.") +(exclusion_set "2b_0mm.i" + "2b_0mf.i, 2b_0mm.f, 2b_0bb.b, 2b_0mb.b,\ + 2b_0mi.b, 2b_0mm.b, 2b_0mf.b, 2b_0mlx.") +(exclusion_set "2b_0mf.i" + "2b_0mm.f, 2b_0bb.b, 2b_0mb.b, 2b_0mi.b, 2b_0mm.b, 2b_0mf.b, 2b_0mlx.") +(exclusion_set "2b_0mm.f" + "2b_0bb.b, 2b_0mb.b, 2b_0mi.b, 2b_0mm.b, 2b_0mf.b, 2b_0mlx.") +(exclusion_set "2b_0bb.b" "2b_0mb.b, 2b_0mi.b, 2b_0mm.b, 2b_0mf.b, 2b_0mlx.") +(exclusion_set "2b_0mb.b" "2b_0mi.b, 2b_0mm.b, 2b_0mf.b, 2b_0mlx.") +(exclusion_set "2b_0mi.b" "2b_0mm.b, 2b_0mf.b, 2b_0mlx.") +(exclusion_set "2b_0mm.b" "2b_0mf.b, 2b_0mlx.") +(exclusion_set "2b_0mf.b" "2b_0mlx.") + +;; Slot 3 +(exclusion_set "2b_0mii." + "2b_0mmi., 2b_0mfi., 2b_0mmf., 2b_0bbb., 2b_0mbb.,\ + 2b_0mib., 2b_0mmb., 2b_0mfb., 2b_0mlx.") +(exclusion_set "2b_0mmi." + "2b_0mfi., 2b_0mmf., 2b_0bbb., 2b_0mbb.,\ + 2b_0mib., 2b_0mmb., 2b_0mfb., 2b_0mlx.") +(exclusion_set "2b_0mfi." + "2b_0mmf., 2b_0bbb., 2b_0mbb., 2b_0mib., 2b_0mmb., 2b_0mfb., 2b_0mlx.") +(exclusion_set "2b_0mmf." + "2b_0bbb., 2b_0mbb., 2b_0mib., 2b_0mmb., 2b_0mfb., 2b_0mlx.") +(exclusion_set "2b_0bbb." "2b_0mbb., 2b_0mib., 2b_0mmb., 2b_0mfb., 2b_0mlx.") +(exclusion_set "2b_0mbb." "2b_0mib., 2b_0mmb., 2b_0mfb., 2b_0mlx.") +(exclusion_set "2b_0mib." "2b_0mmb., 2b_0mfb., 2b_0mlx.") +(exclusion_set "2b_0mmb." "2b_0mfb., 2b_0mlx.") +(exclusion_set "2b_0mfb." "2b_0mlx.") + +;; Slot 4 +(exclusion_set "2b_1m.ii" + "2b_1m.mi, 2b_1m.fi, 2b_1m.mf, 2b_1b.bb, 2b_1m.bb,\ + 2b_1m.ib, 2b_1m.mb, 2b_1m.fb, 2b_1m.lx") +(exclusion_set "2b_1m.mi" + "2b_1m.fi, 2b_1m.mf, 2b_1b.bb, 2b_1m.bb, 2b_1m.ib,\ + 2b_1m.mb, 2b_1m.fb, 2b_1m.lx") +(exclusion_set "2b_1m.fi" + "2b_1m.mf, 2b_1b.bb, 2b_1m.bb, 2b_1m.ib, 2b_1m.mb, 2b_1m.fb, 2b_1m.lx") +(exclusion_set "2b_1m.mf" + "2b_1b.bb, 2b_1m.bb, 2b_1m.ib, 2b_1m.mb, 2b_1m.fb, 2b_1m.lx") +(exclusion_set "2b_1b.bb" "2b_1m.bb, 2b_1m.ib, 2b_1m.mb, 2b_1m.fb, 2b_1m.lx") +(exclusion_set "2b_1m.bb" "2b_1m.ib, 2b_1m.mb, 2b_1m.fb, 2b_1m.lx") +(exclusion_set "2b_1m.ib" "2b_1m.mb, 2b_1m.fb, 2b_1m.lx") +(exclusion_set "2b_1m.mb" "2b_1m.fb, 2b_1m.lx") +(exclusion_set "2b_1m.fb" "2b_1m.lx") + +;; Slot 5 +(exclusion_set "2b_1mi.i" + "2b_1mm.i, 2b_1mf.i, 2b_1mm.f, 2b_1bb.b, 2b_1mb.b,\ + 2b_1mi.b, 2b_1mm.b, 2b_1mf.b, 2b_1mlx.") +(exclusion_set "2b_1mm.i" + "2b_1mf.i, 2b_1mm.f, 2b_1bb.b, 2b_1mb.b,\ + 2b_1mi.b, 2b_1mm.b, 2b_1mf.b, 2b_1mlx.") +(exclusion_set "2b_1mf.i" + "2b_1mm.f, 2b_1bb.b, 2b_1mb.b, 2b_1mi.b, 2b_1mm.b, 2b_1mf.b, 2b_1mlx.") +(exclusion_set "2b_1mm.f" + "2b_1bb.b, 2b_1mb.b, 2b_1mi.b, 2b_1mm.b, 2b_1mf.b, 2b_1mlx.") +(exclusion_set "2b_1bb.b" "2b_1mb.b, 2b_1mi.b, 2b_1mm.b, 2b_1mf.b, 2b_1mlx.") +(exclusion_set "2b_1mb.b" "2b_1mi.b, 2b_1mm.b, 2b_1mf.b, 2b_1mlx.") +(exclusion_set "2b_1mi.b" "2b_1mm.b, 2b_1mf.b, 2b_1mlx.") +(exclusion_set "2b_1mm.b" "2b_1mf.b, 2b_1mlx.") +(exclusion_set "2b_1mf.b" "2b_1mlx.") + +;; Slot 6 +(exclusion_set "2b_1mii." + "2b_1mmi., 2b_1mfi., 2b_1mmf., 2b_1bbb., 2b_1mbb.,\ + 2b_1mib., 2b_1mmb., 2b_1mfb., 2b_1mlx.") +(exclusion_set "2b_1mmi." + "2b_1mfi., 2b_1mmf., 2b_1bbb., 2b_1mbb.,\ + 2b_1mib., 2b_1mmb., 2b_1mfb., 2b_1mlx.") +(exclusion_set "2b_1mfi." + "2b_1mmf., 2b_1bbb., 2b_1mbb., 2b_1mib., 2b_1mmb., 2b_1mfb., 2b_1mlx.") +(exclusion_set "2b_1mmf." + "2b_1bbb., 2b_1mbb., 2b_1mib., 2b_1mmb., 2b_1mfb., 2b_1mlx.") +(exclusion_set "2b_1bbb." "2b_1mbb., 2b_1mib., 2b_1mmb., 2b_1mfb., 2b_1mlx.") +(exclusion_set "2b_1mbb." "2b_1mib., 2b_1mmb., 2b_1mfb., 2b_1mlx.") +(exclusion_set "2b_1mib." "2b_1mmb., 2b_1mfb., 2b_1mlx.") +(exclusion_set "2b_1mmb." "2b_1mfb., 2b_1mlx.") +(exclusion_set "2b_1mfb." "2b_1mlx.") + +(final_presence_set "2b_0mi.i" "2b_0m.ii") +(final_presence_set "2b_0mii." "2b_0mi.i") +(final_presence_set "2b_1mi.i" "2b_1m.ii") +(final_presence_set "2b_1mii." "2b_1mi.i") + +(final_presence_set "2b_0mm.i" "2b_0m.mi") +(final_presence_set "2b_0mmi." "2b_0mm.i") +(final_presence_set "2b_1mm.i" "2b_1m.mi") +(final_presence_set "2b_1mmi." "2b_1mm.i") + +(final_presence_set "2b_0mf.i" "2b_0m.fi") +(final_presence_set "2b_0mfi." "2b_0mf.i") +(final_presence_set "2b_1mf.i" "2b_1m.fi") +(final_presence_set "2b_1mfi." "2b_1mf.i") + +(final_presence_set "2b_0mm.f" "2b_0m.mf") +(final_presence_set "2b_0mmf." "2b_0mm.f") +(final_presence_set "2b_1mm.f" "2b_1m.mf") +(final_presence_set "2b_1mmf." "2b_1mm.f") + +(final_presence_set "2b_0bb.b" "2b_0b.bb") +(final_presence_set "2b_0bbb." "2b_0bb.b") +(final_presence_set "2b_1bb.b" "2b_1b.bb") +(final_presence_set "2b_1bbb." "2b_1bb.b") + +(final_presence_set "2b_0mb.b" "2b_0m.bb") +(final_presence_set "2b_0mbb." "2b_0mb.b") +(final_presence_set "2b_1mb.b" "2b_1m.bb") +(final_presence_set "2b_1mbb." "2b_1mb.b") + +(final_presence_set "2b_0mi.b" "2b_0m.ib") +(final_presence_set "2b_0mib." "2b_0mi.b") +(final_presence_set "2b_1mi.b" "2b_1m.ib") +(final_presence_set "2b_1mib." "2b_1mi.b") + +(final_presence_set "2b_0mm.b" "2b_0m.mb") +(final_presence_set "2b_0mmb." "2b_0mm.b") +(final_presence_set "2b_1mm.b" "2b_1m.mb") +(final_presence_set "2b_1mmb." "2b_1mm.b") + +(final_presence_set "2b_0mf.b" "2b_0m.fb") +(final_presence_set "2b_0mfb." "2b_0mf.b") +(final_presence_set "2b_1mf.b" "2b_1m.fb") +(final_presence_set "2b_1mfb." "2b_1mf.b") + +(final_presence_set "2b_0mlx." "2b_0m.lx") +(final_presence_set "2b_1mlx." "2b_1m.lx") + +;; See the corresponding comment in non-bundling section above. +(final_presence_set + "2b_1m.lx" + "2b_0mmi.,2b_0mfi.,2b_0mmf.,2b_0mib.,2b_0mmb.,2b_0mfb.,2b_0mlx.") +(final_presence_set "2b_1b.bb" "2b_0mii.,2b_0mmi.,2b_0mfi.,2b_0mmf.,2b_0mlx.") +(final_presence_set + "2b_1m.ii,2b_1m.mi,2b_1m.fi,2b_1m.mf,2b_1m.bb,2b_1m.ib,2b_1m.mb,2b_1m.fb" + "2b_0mii.,2b_0mmi.,2b_0mfi.,2b_0mmf.,2b_0mib.,2b_0mmb.,2b_0mfb.,2b_0mlx.") + +;; Ports/units (nb means nop.b insn issued into given port): +(define_cpu_unit + "2b_um0, 2b_um1, 2b_um2, 2b_um3, 2b_ui0, 2b_ui1, 2b_uf0, 2b_uf1,\ + 2b_ub0, 2b_ub1, 2b_ub2, 2b_unb0, 2b_unb1, 2b_unb2" "twob") + +(exclusion_set "2b_ub0" "2b_unb0") +(exclusion_set "2b_ub1" "2b_unb1") +(exclusion_set "2b_ub2" "2b_unb2") + +;; The following rules are used to decrease number of alternatives. +;; They are consequences of Itanium2 microarchitecture. They also +;; describe the following rules mentioned in Itanium2 +;; microarchitecture: rules mentioned in Itanium2 microarchitecture: +;; o "BBB/MBB: Always splits issue after either of these bundles". +;; o "MIB BBB: Split issue after the first bundle in this pair". +(exclusion_set + "2b_0b.bb,2b_0bb.b,2b_0bbb.,2b_0m.bb,2b_0mb.b,2b_0mbb." + "2b_1m.ii,2b_1m.mi,2b_1m.fi,2b_1m.mf,2b_1b.bb,2b_1m.bb,\ + 2b_1m.ib,2b_1m.mb,2b_1m.fb,2b_1m.lx") +(exclusion_set "2b_0m.ib,2b_0mi.b,2b_0mib." "2b_1b.bb") + +;;; "MIB/MFB/MMB: Splits issue after any of these bundles unless the +;;; B-slot contains a nop.b or a brp instruction". +;;; "The B in an MIB/MFB/MMB bundle disperses to B0 if it is a brp or +;;; nop.b, otherwise it disperses to B2". +(final_absence_set + "2b_1m.ii, 2b_1m.mi, 2b_1m.fi, 2b_1m.mf, 2b_1b.bb, 2b_1m.bb,\ + 2b_1m.ib, 2b_1m.mb, 2b_1m.fb, 2b_1m.lx" + "2b_0mib. 2b_ub2, 2b_0mfb. 2b_ub2, 2b_0mmb. 2b_ub2") + +;; This is necessary to start new processor cycle when we meet stop bit. +(define_cpu_unit "2b_stop" "twob") +(final_absence_set + "2b_0m.ii,2b_0mi.i,2b_0mii.,2b_0m.mi,2b_0mm.i,2b_0mmi.,\ + 2b_0m.fi,2b_0mf.i,2b_0mfi.,\ + 2b_0m.mf,2b_0mm.f,2b_0mmf.,2b_0b.bb,2b_0bb.b,2b_0bbb.,\ + 2b_0m.bb,2b_0mb.b,2b_0mbb.,\ + 2b_0m.ib,2b_0mi.b,2b_0mib.,2b_0m.mb,2b_0mm.b,2b_0mmb.,\ + 2b_0m.fb,2b_0mf.b,2b_0mfb.,2b_0m.lx,2b_0mlx., \ + 2b_1m.ii,2b_1mi.i,2b_1mii.,2b_1m.mi,2b_1mm.i,2b_1mmi.,\ + 2b_1m.fi,2b_1mf.i,2b_1mfi.,\ + 2b_1m.mf,2b_1mm.f,2b_1mmf.,2b_1b.bb,2b_1bb.b,2b_1bbb.,\ + 2b_1m.bb,2b_1mb.b,2b_1mbb.,\ + 2b_1m.ib,2b_1mi.b,2b_1mib.,2b_1m.mb,2b_1mm.b,2b_1mmb.,\ + 2b_1m.fb,2b_1mf.b,2b_1mfb.,2b_1m.lx,2b_1mlx." + "2b_stop") + +;; The issue logic can reorder M slot insns between different subtypes +;; but can not reorder insn within the same subtypes. The following +;; constraint is enough to describe this. +(final_presence_set "2b_um1" "2b_um0") +(final_presence_set "2b_um3" "2b_um2") + +;; The insn in the 1st I slot of the two bundle issue group will issue +;; to I0. The second I slot insn will issue to I1. +(final_presence_set "2b_ui1" "2b_ui0") + +;; For exceptions of I insns: +(define_cpu_unit "2b_only_ui0" "twob") +(final_absence_set "2b_only_ui0" "2b_ui1") + +;; Insns + +(define_reservation "2b_M" + "((2b_0m.ii|2b_0m.mi|2b_0m.fi|2b_0m.mf|2b_0m.bb\ + |2b_0m.ib|2b_0m.mb|2b_0m.fb|2b_0m.lx)+2_1\ + |(2b_1m.ii|2b_1m.mi|2b_1m.fi|2b_1m.mf|2b_1m.bb\ + |2b_1m.ib|2b_1m.mb|2b_1m.fb|2b_1m.lx)+2_4\ + |(2b_0mm.i|2b_0mm.f|2b_0mm.b)+2_2\ + |(2b_1mm.i|2b_1mm.f|2b_1mm.b)+2_5)\ + +(2b_um0|2b_um1|2b_um2|2b_um3)") + +(define_reservation "2b_M_only_um0" + "((2b_0m.ii|2b_0m.mi|2b_0m.fi|2b_0m.mf|2b_0m.bb\ + |2b_0m.ib|2b_0m.mb|2b_0m.fb|2b_0m.lx)+2_1\ + |(2b_1m.ii|2b_1m.mi|2b_1m.fi|2b_1m.mf|2b_1m.bb\ + |2b_1m.ib|2b_1m.mb|2b_1m.fb|2b_1m.lx)+2_4\ + |(2b_0mm.i|2b_0mm.f|2b_0mm.b)+2_2\ + |(2b_1mm.i|2b_1mm.f|2b_1mm.b)+2_5)\ + +2b_um0") + +(define_reservation "2b_M_only_um2" + "((2b_0m.ii|2b_0m.mi|2b_0m.fi|2b_0m.mf|2b_0m.bb\ + |2b_0m.ib|2b_0m.mb|2b_0m.fb|2b_0m.lx)+2_1\ + |(2b_1m.ii|2b_1m.mi|2b_1m.fi|2b_1m.mf|2b_1m.bb\ + |2b_1m.ib|2b_1m.mb|2b_1m.fb|2b_1m.lx)+2_4\ + |(2b_0mm.i|2b_0mm.f|2b_0mm.b)+2_2\ + |(2b_1mm.i|2b_1mm.f|2b_1mm.b)+2_5)\ + +2b_um2") + +(define_reservation "2b_M_only_um01" + "((2b_0m.ii|2b_0m.mi|2b_0m.fi|2b_0m.mf|2b_0m.bb\ + |2b_0m.ib|2b_0m.mb|2b_0m.fb|2b_0m.lx)+2_1\ + |(2b_1m.ii|2b_1m.mi|2b_1m.fi|2b_1m.mf|2b_1m.bb\ + |2b_1m.ib|2b_1m.mb|2b_1m.fb|2b_1m.lx)+2_4\ + |(2b_0mm.i|2b_0mm.f|2b_0mm.b)+2_2\ + |(2b_1mm.i|2b_1mm.f|2b_1mm.b)+2_5)\ + +(2b_um0|2b_um1)") + +(define_reservation "2b_M_only_um23" + "((2b_0m.ii|2b_0m.mi|2b_0m.fi|2b_0m.mf|2b_0m.bb\ + |2b_0m.ib|2b_0m.mb|2b_0m.fb|2b_0m.lx)+2_1\ + |(2b_1m.ii|2b_1m.mi|2b_1m.fi|2b_1m.mf|2b_1m.bb\ + |2b_1m.ib|2b_1m.mb|2b_1m.fb|2b_1m.lx)+2_4\ + |(2b_0mm.i|2b_0mm.f|2b_0mm.b)+2_2\ + |(2b_1mm.i|2b_1mm.f|2b_1mm.b)+2_5)\ + +(2b_um2|2b_um3)") + +;; I instruction is dispersed to the lowest numbered I unit +;; not already in use. Remeber about possible spliting. +(define_reservation "2b_I" + "2b_0mi.i+2_2+2b_ui0|2b_0mii.+2_3+(2b_ui0|2b_ui1)|2b_0mmi.+2_3+2b_ui0\ + |2b_0mfi.+2_3+2b_ui0|2b_0mi.b+2_2+2b_ui0\ + |(2b_1mi.i+2_5|2b_1mi.b+2_5)+(2b_ui0|2b_ui1)\ + |(2b_1mii.|2b_1mmi.|2b_1mfi.)+2_6+(2b_ui0|2b_ui1)") + +;; "An F slot in the 1st bundle disperses to F0". +;; "An F slot in the 2st bundle disperses to F1". +(define_reservation "2b_F" + "2b_0mf.i+2_2+2b_uf0|2b_0mmf.+2_3+2b_uf0|2b_0mf.b+2_2+2b_uf0\ + |2b_1mf.i+2_5+2b_uf1|2b_1mmf.+2_6+2b_uf1|2b_1mf.b+2_5+2b_uf1") + +;;; "Each B slot in MBB or BBB bundle disperses to the corresponding B +;;; unit. That is, a B slot in 1st position is despersed to B0. In the +;;; 2nd position it is dispersed to B2". +(define_reservation "2b_NB" + "2b_0b.bb+2_1+2b_unb0|2b_0bb.b+2_2+2b_unb1|2b_0bbb.+2_3+2b_unb2\ + |2b_0mb.b+2_2+2b_unb1|2b_0mbb.+2_3+2b_unb2\ + |2b_0mib.+2_3+2b_unb0|2b_0mmb.+2_3+2b_unb0|2b_0mfb.+2_3+2b_unb0\ + |2b_1b.bb+2_4+2b_unb0|2b_1bb.b+2_5+2b_unb1\ + |2b_1bbb.+2_6+2b_unb2|2b_1mb.b+2_5+2b_unb1|2b_1mbb.+2_6+2b_unb2\ + |2b_1mib.+2_6+2b_unb0|2b_1mmb.+2_6+2b_unb0|2b_1mfb.+2_6+2b_unb0") + +(define_reservation "2b_B" + "2b_0b.bb+2_1+2b_ub0|2b_0bb.b+2_2+2b_ub1|2b_0bbb.+2_3+2b_ub2\ + |2b_0mb.b+2_2+2b_ub1|2b_0mbb.+2_3+2b_ub2|2b_0mib.+2_3+2b_ub2\ + |2b_0mfb.+2_3+2b_ub2|2b_1b.bb+2_4+2b_ub0|2b_1bb.b+2_5+2b_ub1\ + |2b_1bbb.+2_6+2b_ub2|2b_1mb.b+2_5+2b_ub1\ + |2b_1mib.+2_6+2b_ub2|2b_1mmb.+2_6+2b_ub2|2b_1mfb.+2_6+2b_ub2") + +;; For the MLI template, the I slot insn is always assigned to port I0 +;; if it is in the first bundle or it is assigned to port I1 if it is in +;; the second bundle. +(define_reservation "2b_L" + "2b_0mlx.+2_3+2b_ui0+2b_uf0|2b_1mlx.+2_6+2b_ui1+2b_uf1") + +;; Should we describe that A insn in I slot can be issued into M +;; ports? I think it is not necessary because of multipass +;; scheduling. For example, the multipass scheduling could use +;; MMI-MMI instead of MII-MII where the two last I slots contain A +;; insns (even if the case is complicated by use-def conflicts). +;; +;; In any case we could describe it as +;; (define_cpu_unit "2b_ui1_0pres,2b_ui1_1pres,2b_ui1_2pres,2b_ui1_3pres" +;; "twob") +;; (final_presence_set "2b_ui1_0pres,2b_ui1_1pres,2b_ui1_2pres,2b_ui1_3pres" +;; "2b_ui1") +;; (define_reservation "b_A" +;; "b_M|b_I\ +;; |(2b_1mi.i+2_5|2b_1mii.+2_6|2b_1mmi.+2_6|2b_1mfi.+2_6|2b_1mi.b+2_5)\ +;; +(2b_um0|2b_um1|2b_um2|2b_um3)\ +;; +(2b_ui1_0pres|2b_ui1_1pres|2b_ui1_2pres|2b_ui1_3pres)") + +(define_reservation "2b_A" "2b_M|2b_I") + +;; We assume that there is no insn issued on the same cycle as the +;; unknown insn. +(define_cpu_unit "2b_empty" "twob") +(exclusion_set "2b_empty" + "2b_0m.ii,2b_0m.mi,2b_0m.fi,2b_0m.mf,2b_0b.bb,2b_0m.bb,\ + 2b_0m.ib,2b_0m.mb,2b_0m.fb,2b_0m.lx,2b_0mm.i") + +(define_cpu_unit + "2b_0m_bs, 2b_0mi_bs, 2b_0mm_bs, 2b_0mf_bs, 2b_0b_bs, 2b_0bb_bs, 2b_0mb_bs" + "twob") +(define_cpu_unit + "2b_1m_bs, 2b_1mi_bs, 2b_1mm_bs, 2b_1mf_bs, 2b_1b_bs, 2b_1bb_bs, 2b_1mb_bs" + "twob") + +(define_cpu_unit "2b_m_cont, 2b_mi_cont, 2b_mm_cont, 2b_mf_cont, 2b_mb_cont,\ + 2b_b_cont, 2b_bb_cont" "twob") + +;; For stop in the middle of the bundles. +(define_cpu_unit "2b_m_stop, 2b_m0_stop, 2b_m1_stop, 2b_0mmi_cont" "twob") +(define_cpu_unit "2b_mi_stop, 2b_mi0_stop, 2b_mi1_stop, 2b_0mii_cont" "twob") + +(final_presence_set "2b_0m_bs" + "2b_0m.ii, 2b_0m.mi, 2b_0m.mf, 2b_0m.fi, 2b_0m.bb,\ + 2b_0m.ib, 2b_0m.fb, 2b_0m.mb, 2b_0m.lx") +(final_presence_set "2b_1m_bs" + "2b_1m.ii, 2b_1m.mi, 2b_1m.mf, 2b_1m.fi, 2b_1m.bb,\ + 2b_1m.ib, 2b_1m.fb, 2b_1m.mb, 2b_1m.lx") +(final_presence_set "2b_0mi_bs" "2b_0mi.i, 2b_0mi.i") +(final_presence_set "2b_1mi_bs" "2b_1mi.i, 2b_1mi.i") +(final_presence_set "2b_0mm_bs" "2b_0mm.i, 2b_0mm.f, 2b_0mm.b") +(final_presence_set "2b_1mm_bs" "2b_1mm.i, 2b_1mm.f, 2b_1mm.b") +(final_presence_set "2b_0mf_bs" "2b_0mf.i, 2b_0mf.b") +(final_presence_set "2b_1mf_bs" "2b_1mf.i, 2b_1mf.b") +(final_presence_set "2b_0b_bs" "2b_0b.bb") +(final_presence_set "2b_1b_bs" "2b_1b.bb") +(final_presence_set "2b_0bb_bs" "2b_0bb.b") +(final_presence_set "2b_1bb_bs" "2b_1bb.b") +(final_presence_set "2b_0mb_bs" "2b_0mb.b") +(final_presence_set "2b_1mb_bs" "2b_1mb.b") + +(exclusion_set "2b_0m_bs" + "2b_0mi.i, 2b_0mm.i, 2b_0mm.f, 2b_0mf.i, 2b_0mb.b,\ + 2b_0mi.b, 2b_0mf.b, 2b_0mm.b, 2b_0mlx., 2b_m0_stop") +(exclusion_set "2b_1m_bs" + "2b_1mi.i, 2b_1mm.i, 2b_1mm.f, 2b_1mf.i, 2b_1mb.b,\ + 2b_1mi.b, 2b_1mf.b, 2b_1mm.b, 2b_1mlx., 2b_m1_stop") +(exclusion_set "2b_0mi_bs" "2b_0mii., 2b_0mib., 2b_mi0_stop") +(exclusion_set "2b_1mi_bs" "2b_1mii., 2b_1mib., 2b_mi1_stop") +(exclusion_set "2b_0mm_bs" "2b_0mmi., 2b_0mmf., 2b_0mmb.") +(exclusion_set "2b_1mm_bs" "2b_1mmi., 2b_1mmf., 2b_1mmb.") +(exclusion_set "2b_0mf_bs" "2b_0mfi., 2b_0mfb.") +(exclusion_set "2b_1mf_bs" "2b_1mfi., 2b_1mfb.") +(exclusion_set "2b_0b_bs" "2b_0bb.b") +(exclusion_set "2b_1b_bs" "2b_1bb.b") +(exclusion_set "2b_0bb_bs" "2b_0bbb.") +(exclusion_set "2b_1bb_bs" "2b_1bbb.") +(exclusion_set "2b_0mb_bs" "2b_0mbb.") +(exclusion_set "2b_1mb_bs" "2b_1mbb.") + +(exclusion_set + "2b_0m_bs, 2b_0mi_bs, 2b_0mm_bs, 2b_0mf_bs, 2b_0b_bs, 2b_0bb_bs, 2b_0mb_bs, + 2b_1m_bs, 2b_1mi_bs, 2b_1mm_bs, 2b_1mf_bs, 2b_1b_bs, 2b_1bb_bs, 2b_1mb_bs" + "2b_stop") + +(final_presence_set + "2b_0mi.i, 2b_0mm.i, 2b_0mf.i, 2b_0mm.f, 2b_0mb.b,\ + 2b_0mi.b, 2b_0mm.b, 2b_0mf.b, 2b_0mlx." + "2b_m_cont") +(final_presence_set "2b_0mii., 2b_0mib." "2b_mi_cont") +(final_presence_set "2b_0mmi., 2b_0mmf., 2b_0mmb." "2b_mm_cont") +(final_presence_set "2b_0mfi., 2b_0mfb." "2b_mf_cont") +(final_presence_set "2b_0bb.b" "2b_b_cont") +(final_presence_set "2b_0bbb." "2b_bb_cont") +(final_presence_set "2b_0mbb." "2b_mb_cont") + +(exclusion_set + "2b_0m.ii, 2b_0m.mi, 2b_0m.fi, 2b_0m.mf, 2b_0b.bb, 2b_0m.bb,\ + 2b_0m.ib, 2b_0m.mb, 2b_0m.fb, 2b_0m.lx" + "2b_m_cont, 2b_mi_cont, 2b_mm_cont, 2b_mf_cont,\ + 2b_mb_cont, 2b_b_cont, 2b_bb_cont") + +(exclusion_set "2b_empty" + "2b_m_cont,2b_mi_cont,2b_mm_cont,2b_mf_cont,\ + 2b_mb_cont,2b_b_cont,2b_bb_cont") + +;; For m;mi bundle +(final_presence_set "2b_m0_stop" "2b_0m.mi") +(final_presence_set "2b_0mm.i" "2b_0mmi_cont") +(exclusion_set "2b_0mmi_cont" + "2b_0m.ii, 2b_0m.mi, 2b_0m.fi, 2b_0m.mf, 2b_0b.bb, 2b_0m.bb,\ + 2b_0m.ib, 2b_0m.mb, 2b_0m.fb, 2b_0m.lx") +(exclusion_set "2b_m0_stop" "2b_0mm.i") +(final_presence_set "2b_m1_stop" "2b_1m.mi") +(exclusion_set "2b_m1_stop" "2b_1mm.i") +(final_presence_set "2b_m_stop" "2b_m0_stop, 2b_m1_stop") + +;; For mi;i bundle +(final_presence_set "2b_mi0_stop" "2b_0mi.i") +(final_presence_set "2b_0mii." "2b_0mii_cont") +(exclusion_set "2b_0mii_cont" + "2b_0m.ii, 2b_0m.mi, 2b_0m.fi, 2b_0m.mf, 2b_0b.bb, 2b_0m.bb,\ + 2b_0m.ib, 2b_0m.mb, 2b_0m.fb, 2b_0m.lx") +(exclusion_set "2b_mi0_stop" "2b_0mii.") +(final_presence_set "2b_mi1_stop" "2b_1mi.i") +(exclusion_set "2b_mi1_stop" "2b_1mii.") +(final_presence_set "2b_mi_stop" "2b_mi0_stop, 2b_mi1_stop") + +(final_absence_set + "2b_0m.ii,2b_0mi.i,2b_0mii.,2b_0m.mi,2b_0mm.i,2b_0mmi.,\ + 2b_0m.fi,2b_0mf.i,2b_0mfi.,2b_0m.mf,2b_0mm.f,2b_0mmf.,\ + 2b_0b.bb,2b_0bb.b,2b_0bbb.,2b_0m.bb,2b_0mb.b,2b_0mbb.,\ + 2b_0m.ib,2b_0mi.b,2b_0mib.,2b_0m.mb,2b_0mm.b,2b_0mmb.,\ + 2b_0m.fb,2b_0mf.b,2b_0mfb.,2b_0m.lx,2b_0mlx., \ + 2b_1m.ii,2b_1mi.i,2b_1mii.,2b_1m.mi,2b_1mm.i,2b_1mmi.,\ + 2b_1m.fi,2b_1mf.i,2b_1mfi.,2b_1m.mf,2b_1mm.f,2b_1mmf.,\ + 2b_1b.bb,2b_1bb.b,2b_1bbb.,2b_1m.bb,2b_1mb.b,2b_1mbb.,\ + 2b_1m.ib,2b_1mi.b,2b_1mib.,2b_1m.mb,2b_1mm.b,2b_1mmb.,\ + 2b_1m.fb,2b_1mf.b,2b_1mfb.,2b_1m.lx,2b_1mlx." + "2b_m0_stop,2b_m1_stop,2b_mi0_stop,2b_mi1_stop") + +(define_insn_reservation "2b_stop_bit" 0 + (and (and (eq_attr "cpu" "itanium2") + (eq_attr "itanium_class" "stop_bit")) + (ne (symbol_ref "bundling_p") (const_int 0))) + "2b_stop|2b_m0_stop|2b_m1_stop|2b_mi0_stop|2b_mi1_stop") +(define_insn_reservation "2b_br" 0 + (and (and (eq_attr "cpu" "itanium2") + (eq_attr "itanium_class" "br")) + (ne (symbol_ref "bundling_p") (const_int 0))) "2b_B") +(define_insn_reservation "2b_scall" 0 + (and (and (eq_attr "cpu" "itanium2") + (eq_attr "itanium_class" "scall")) + (ne (symbol_ref "bundling_p") (const_int 0))) "2b_B") +(define_insn_reservation "2b_fcmp" 2 + (and (and (eq_attr "cpu" "itanium2") + (eq_attr "itanium_class" "fcmp")) + (ne (symbol_ref "bundling_p") (const_int 0))) "2b_F") +(define_insn_reservation "2b_fcvtfx" 4 + (and (and (eq_attr "cpu" "itanium2") + (eq_attr "itanium_class" "fcvtfx")) + (ne (symbol_ref "bundling_p") (const_int 0))) "2b_F") +(define_insn_reservation "2b_fld" 6 + (and (and (eq_attr "cpu" "itanium2") + (eq_attr "itanium_class" "fld")) + (ne (symbol_ref "bundling_p") (const_int 0))) "2b_M") +(define_insn_reservation "2b_fmac" 4 + (and (and (eq_attr "cpu" "itanium2") + (eq_attr "itanium_class" "fmac")) + (ne (symbol_ref "bundling_p") (const_int 0))) "2b_F") +(define_insn_reservation "2b_fmisc" 4 + (and (and (eq_attr "cpu" "itanium2") + (eq_attr "itanium_class" "fmisc")) + (ne (symbol_ref "bundling_p") (const_int 0))) "2b_F") + +;; Latency time ??? +(define_insn_reservation "2b_frar_i" 13 + (and (and (eq_attr "cpu" "itanium2") + (eq_attr "itanium_class" "frar_i")) + (ne (symbol_ref "bundling_p") (const_int 0))) + "2b_I+2b_only_ui0") +;; Latency time ??? +(define_insn_reservation "2b_frar_m" 6 + (and (and (eq_attr "cpu" "itanium2") + (eq_attr "itanium_class" "frar_m")) + (ne (symbol_ref "bundling_p") (const_int 0))) + "2b_M_only_um2") +(define_insn_reservation "2b_frbr" 2 + (and (and (eq_attr "cpu" "itanium2") + (eq_attr "itanium_class" "frbr")) + (ne (symbol_ref "bundling_p") (const_int 0))) + "2b_I+2b_only_ui0") +(define_insn_reservation "2b_frfr" 5 + (and (and (eq_attr "cpu" "itanium2") + (eq_attr "itanium_class" "frfr")) + (ne (symbol_ref "bundling_p") (const_int 0))) + "2b_M_only_um2") +(define_insn_reservation "2b_frpr" 2 + (and (and (eq_attr "cpu" "itanium2") + (eq_attr "itanium_class" "frpr")) + (ne (symbol_ref "bundling_p") (const_int 0))) + "2b_I+2b_only_ui0") + +(define_insn_reservation "2b_ialu" 1 + (and (and (eq_attr "cpu" "itanium2") + (eq_attr "itanium_class" "ialu")) + (ne (symbol_ref "bundling_p") (const_int 0))) + "2b_A") +(define_insn_reservation "2b_icmp" 1 + (and (and (eq_attr "cpu" "itanium2") + (eq_attr "itanium_class" "icmp")) + (ne (symbol_ref "bundling_p") (const_int 0))) "2b_A") +(define_insn_reservation "2b_ilog" 1 + (and (and (eq_attr "cpu" "itanium2") + (eq_attr "itanium_class" "ilog")) + (ne (symbol_ref "bundling_p") (const_int 0))) "2b_A") +;; Latency time ??? +(define_insn_reservation "2b_ishf" 1 + (and (and (eq_attr "cpu" "itanium2") + (eq_attr "itanium_class" "ishf")) + (ne (symbol_ref "bundling_p") (const_int 0))) + "2b_I+2b_only_ui0") +(define_insn_reservation "2b_ld" 1 + (and (and (eq_attr "cpu" "itanium2") + (eq_attr "itanium_class" "ld")) + (ne (symbol_ref "bundling_p") (const_int 0))) + "2b_M_only_um01") +(define_insn_reservation "2b_long_i" 1 + (and (and (eq_attr "cpu" "itanium2") + (eq_attr "itanium_class" "long_i")) + (ne (symbol_ref "bundling_p") (const_int 0))) "2b_L") + +;; Latency time ??? +(define_insn_reservation "2b_mmmul" 2 + (and (and (eq_attr "cpu" "itanium2") + (eq_attr "itanium_class" "mmmul")) + (ne (symbol_ref "bundling_p") (const_int 0))) + "2b_I+2b_only_ui0") +;; Latency time ??? +(define_insn_reservation "2b_mmshf" 2 + (and (and (eq_attr "cpu" "itanium2") + (eq_attr "itanium_class" "mmshf")) + (ne (symbol_ref "bundling_p") (const_int 0))) "2b_I") +;; Latency time ??? +(define_insn_reservation "2b_mmshfi" 1 + (and (and (eq_attr "cpu" "itanium2") + (eq_attr "itanium_class" "mmshfi")) + (ne (symbol_ref "bundling_p") (const_int 0))) "2b_I") + +(define_insn_reservation "2b_rse_m" 0 + (and (and (eq_attr "cpu" "itanium2") + (eq_attr "itanium_class" "rse_m")) + (ne (symbol_ref "bundling_p") (const_int 0))) + "(2b_0m.ii|2b_0m.mi|2b_0m.fi|2b_0m.mf|2b_0m.bb\ + |2b_0m.ib|2b_0m.mb|2b_0m.fb|2b_0m.lx)+2_1+2b_um0") +(define_insn_reservation "2b_sem" 0 + (and (and (eq_attr "cpu" "itanium2") + (eq_attr "itanium_class" "sem")) + (ne (symbol_ref "bundling_p") (const_int 0))) + "2b_M_only_um23") + +(define_insn_reservation "2b_stf" 1 + (and (and (eq_attr "cpu" "itanium2") + (eq_attr "itanium_class" "stf")) + (ne (symbol_ref "bundling_p") (const_int 0))) + "2b_M_only_um23") +(define_insn_reservation "2b_st" 1 + (and (and (eq_attr "cpu" "itanium2") + (eq_attr "itanium_class" "st")) + (ne (symbol_ref "bundling_p") (const_int 0))) + "2b_M_only_um23") +(define_insn_reservation "2b_syst_m0" 0 + (and (and (eq_attr "cpu" "itanium2") + (eq_attr "itanium_class" "syst_m0")) + (ne (symbol_ref "bundling_p") (const_int 0))) + "2b_M_only_um2") +(define_insn_reservation "2b_syst_m" 0 + (and (and (eq_attr "cpu" "itanium2") + (eq_attr "itanium_class" "syst_m")) + (ne (symbol_ref "bundling_p") (const_int 0))) + "2b_M_only_um0") +;; Reservation??? +(define_insn_reservation "2b_tbit" 1 + (and (and (eq_attr "cpu" "itanium2") + (eq_attr "itanium_class" "tbit")) + (ne (symbol_ref "bundling_p") (const_int 0))) + "2b_I+2b_only_ui0") +(define_insn_reservation "2b_toar_i" 0 + (and (and (eq_attr "cpu" "itanium2") + (eq_attr "itanium_class" "toar_i")) + (ne (symbol_ref "bundling_p") (const_int 0))) + "2b_I+2b_only_ui0") +;; Latency time ??? +(define_insn_reservation "2b_toar_m" 5 + (and (and (eq_attr "cpu" "itanium2") + (eq_attr "itanium_class" "toar_m")) + (ne (symbol_ref "bundling_p") (const_int 0))) + "2b_M_only_um2") +;; Latency time ??? +(define_insn_reservation "2b_tobr" 1 + (and (and (eq_attr "cpu" "itanium2") + (eq_attr "itanium_class" "tobr")) + (ne (symbol_ref "bundling_p") (const_int 0))) + "2b_I+2b_only_ui0") +(define_insn_reservation "2b_tofr" 5 + (and (and (eq_attr "cpu" "itanium2") + (eq_attr "itanium_class" "tofr")) + (ne (symbol_ref "bundling_p") (const_int 0))) + "2b_M_only_um23") +;; Latency time ??? +(define_insn_reservation "2b_topr" 1 + (and (and (eq_attr "cpu" "itanium2") + (eq_attr "itanium_class" "topr")) + (ne (symbol_ref "bundling_p") (const_int 0))) + "2b_I+2b_only_ui0") + +(define_insn_reservation "2b_xmpy" 4 + (and (and (eq_attr "cpu" "itanium2") + (eq_attr "itanium_class" "xmpy")) + (ne (symbol_ref "bundling_p") (const_int 0))) "2b_F") +;; Latency time ??? +(define_insn_reservation "2b_xtd" 1 + (and (and (eq_attr "cpu" "itanium2") + (eq_attr "itanium_class" "xtd")) + (ne (symbol_ref "bundling_p") (const_int 0))) "2b_I") +(define_insn_reservation "2b_chk_s" 0 + (and (and (eq_attr "cpu" "itanium2") + (eq_attr "itanium_class" "chk_s")) + (ne (symbol_ref "bundling_p") (const_int 0))) + "2b_I|2b_M_only_um23") +(define_insn_reservation "2b_lfetch" 0 + (and (and (eq_attr "cpu" "itanium2") + (eq_attr "itanium_class" "lfetch")) + (ne (symbol_ref "bundling_p") (const_int 0))) + "2b_M_only_um01") +(define_insn_reservation "2b_nop_m" 0 + (and (and (eq_attr "cpu" "itanium2") + (eq_attr "itanium_class" "nop_m")) + (ne (symbol_ref "bundling_p") (const_int 0))) "2b_M") +(define_insn_reservation "2b_nop_b" 0 + (and (and (eq_attr "cpu" "itanium2") + (eq_attr "itanium_class" "nop_b")) + (ne (symbol_ref "bundling_p") (const_int 0))) "2b_NB") +(define_insn_reservation "2b_nop_i" 0 + (and (and (eq_attr "cpu" "itanium2") + (eq_attr "itanium_class" "nop_i")) + (ne (symbol_ref "bundling_p") (const_int 0))) "2b_I") +(define_insn_reservation "2b_nop_f" 0 + (and (and (eq_attr "cpu" "itanium2") + (eq_attr "itanium_class" "nop_f")) + (ne (symbol_ref "bundling_p") (const_int 0))) "2b_F") +(define_insn_reservation "2b_nop_x" 0 + (and (and (eq_attr "cpu" "itanium2") + (eq_attr "itanium_class" "nop_x")) + (ne (symbol_ref "bundling_p") (const_int 0))) "2b_L") +(define_insn_reservation "2b_unknown" 1 + (and (and (eq_attr "cpu" "itanium2") + (eq_attr "itanium_class" "unknown")) + (ne (symbol_ref "bundling_p") (const_int 0))) "2b_empty") +(define_insn_reservation "2b_nop" 0 + (and (and (eq_attr "cpu" "itanium2") + (eq_attr "itanium_class" "nop")) + (ne (symbol_ref "bundling_p") (const_int 0))) + "2b_M|2b_NB|2b_I|2b_F") +(define_insn_reservation "2b_ignore" 0 + (and (and (eq_attr "cpu" "itanium2") + (eq_attr "itanium_class" "ignore")) + (ne (symbol_ref "bundling_p") (const_int 0))) "nothing") + +(define_insn_reservation "2b_pre_cycle" 0 + (and (and (eq_attr "cpu" "itanium2") + (eq_attr "itanium_class" "pre_cycle")) + (ne (symbol_ref "bundling_p") (const_int 0))) + "(2b_0m_bs, 2b_m_cont) \ + | (2b_0mi_bs, 2b_mi_cont) \ + | (2b_0mm_bs, 2b_mm_cont) \ + | (2b_0mf_bs, 2b_mf_cont) \ + | (2b_0b_bs, 2b_b_cont) \ + | (2b_0bb_bs, 2b_bb_cont) \ + | (2b_0mb_bs, 2b_mb_cont) \ + | (2b_1m_bs, 2b_m_cont) \ + | (2b_1mi_bs, 2b_mi_cont) \ + | (2b_1mm_bs, 2b_mm_cont) \ + | (2b_1mf_bs, 2b_mf_cont) \ + | (2b_1b_bs, 2b_b_cont) \ + | (2b_1bb_bs, 2b_bb_cont) \ + | (2b_1mb_bs, 2b_mb_cont) \ + | (2b_m_stop, 2b_0mmi_cont) \ + | (2b_mi_stop, 2b_0mii_cont)") + |