summaryrefslogtreecommitdiff
path: root/gcc/config/ia64/ia64.c
diff options
context:
space:
mode:
Diffstat (limited to 'gcc/config/ia64/ia64.c')
-rw-r--r--gcc/config/ia64/ia64.c2665
1 files changed, 1378 insertions, 1287 deletions
diff --git a/gcc/config/ia64/ia64.c b/gcc/config/ia64/ia64.c
index 3009d1c1200..039d11aaac7 100644
--- a/gcc/config/ia64/ia64.c
+++ b/gcc/config/ia64/ia64.c
@@ -47,6 +47,7 @@ Boston, MA 02111-1307, USA. */
#include "target.h"
#include "target-def.h"
#include "tm_p.h"
+#include "hashtab.h"
/* This is used for communication between ASM_OUTPUT_LABEL and
ASM_OUTPUT_LABELREF. */
@@ -103,6 +104,12 @@ int ia64_tls_size = 22;
/* String used with the -mtls-size= option. */
const char *ia64_tls_size_string;
+/* Which cpu are we scheduling for. */
+enum processor_type ia64_tune;
+
+/* String used with the -tune= option. */
+const char *ia64_tune_string;
+
/* Determines whether we run our final scheduling pass or not. We always
avoid the normal second scheduling pass. */
static int ia64_flag_schedule_insns2;
@@ -111,7 +118,19 @@ static int ia64_flag_schedule_insns2;
sections. */
unsigned int ia64_section_threshold;
+
+/* The following variable is used by the DFA insn scheduler. The value is
+ TRUE if we do insn bundling instead of insn scheduling. */
+int bundling_p = 0;
+
+static int ia64_use_dfa_pipeline_interface PARAMS ((void));
+static int ia64_first_cycle_multipass_dfa_lookahead PARAMS ((void));
+static void ia64_dependencies_evaluation_hook PARAMS ((rtx, rtx));
+static void ia64_init_dfa_pre_cycle_insn PARAMS ((void));
+static rtx ia64_dfa_pre_cycle_insn PARAMS ((void));
+static int ia64_first_cycle_multipass_dfa_lookahead_guard PARAMS ((rtx));
+static int ia64_dfa_new_cycle PARAMS ((FILE *, int, rtx, int, int, int *));
static rtx gen_tls_get_addr PARAMS ((void));
static rtx gen_thread_pointer PARAMS ((void));
static int find_gr_spill PARAMS ((int));
@@ -132,6 +151,7 @@ static void fix_range PARAMS ((const char *));
static struct machine_function * ia64_init_machine_status PARAMS ((void));
static void emit_insn_group_barriers PARAMS ((FILE *, rtx));
static void emit_all_insn_group_barriers PARAMS ((FILE *, rtx));
+static void final_emit_insn_group_barriers PARAMS ((FILE *));
static void emit_predicate_relation_info PARAMS ((void));
static bool ia64_in_small_data_p PARAMS ((tree));
static void ia64_encode_section_info PARAMS ((tree, int));
@@ -157,12 +177,31 @@ static int ia64_issue_rate PARAMS ((void));
static int ia64_adjust_cost PARAMS ((rtx, rtx, rtx, int));
static void ia64_sched_init PARAMS ((FILE *, int, int));
static void ia64_sched_finish PARAMS ((FILE *, int));
-static int ia64_internal_sched_reorder PARAMS ((FILE *, int, rtx *,
- int *, int, int));
+static int ia64_dfa_sched_reorder PARAMS ((FILE *, int, rtx *, int *,
+ int, int));
static int ia64_sched_reorder PARAMS ((FILE *, int, rtx *, int *, int));
static int ia64_sched_reorder2 PARAMS ((FILE *, int, rtx *, int *, int));
static int ia64_variable_issue PARAMS ((FILE *, int, rtx, int));
+static struct bundle_state *get_free_bundle_state PARAMS ((void));
+static void free_bundle_state PARAMS ((struct bundle_state *));
+static void initiate_bundle_states PARAMS ((void));
+static void finish_bundle_states PARAMS ((void));
+static unsigned bundle_state_hash PARAMS ((const void *));
+static int bundle_state_eq_p PARAMS ((const void *, const void *));
+static int insert_bundle_state PARAMS ((struct bundle_state *));
+static void initiate_bundle_state_table PARAMS ((void));
+static void finish_bundle_state_table PARAMS ((void));
+static int try_issue_nops PARAMS ((struct bundle_state *, int));
+static int try_issue_insn PARAMS ((struct bundle_state *, rtx));
+static void issue_nops_and_insn PARAMS ((struct bundle_state *, int,
+ rtx, int));
+static int get_max_pos PARAMS ((state_t));
+static int get_template PARAMS ((state_t, int));
+
+static rtx get_next_important_insn PARAMS ((rtx, rtx));
+static void bundling PARAMS ((FILE *, int, rtx, rtx));
+
static void ia64_output_mi_thunk PARAMS ((FILE *, tree, HOST_WIDE_INT,
HOST_WIDE_INT, tree));
@@ -244,6 +283,27 @@ static const struct attribute_spec ia64_attribute_table[] =
#undef TARGET_SCHED_REORDER2
#define TARGET_SCHED_REORDER2 ia64_sched_reorder2
+#undef TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK
+#define TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK ia64_dependencies_evaluation_hook
+
+#undef TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE
+#define TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE ia64_use_dfa_pipeline_interface
+
+#undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
+#define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD ia64_first_cycle_multipass_dfa_lookahead
+
+#undef TARGET_SCHED_INIT_DFA_PRE_CYCLE_INSN
+#define TARGET_SCHED_INIT_DFA_PRE_CYCLE_INSN ia64_init_dfa_pre_cycle_insn
+#undef TARGET_SCHED_DFA_PRE_CYCLE_INSN
+#define TARGET_SCHED_DFA_PRE_CYCLE_INSN ia64_dfa_pre_cycle_insn
+
+#undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD
+#define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD\
+ ia64_first_cycle_multipass_dfa_lookahead_guard
+
+#undef TARGET_SCHED_DFA_NEW_CYCLE
+#define TARGET_SCHED_DFA_NEW_CYCLE ia64_dfa_new_cycle
+
#ifdef HAVE_AS_TLS
#undef TARGET_HAVE_TLS
#define TARGET_HAVE_TLS true
@@ -4209,6 +4269,23 @@ ia64_init_machine_status ()
void
ia64_override_options ()
{
+ static struct pta
+ {
+ const char *const name; /* processor name or nickname. */
+ const enum processor_type processor;
+ }
+ const processor_alias_table[] =
+ {
+ {"itanium", PROCESSOR_ITANIUM},
+ {"itanium1", PROCESSOR_ITANIUM},
+ {"merced", PROCESSOR_ITANIUM},
+ {"itanium2", PROCESSOR_ITANIUM2},
+ {"mckinley", PROCESSOR_ITANIUM2},
+ };
+
+ int const pta_size = ARRAY_SIZE (processor_alias_table);
+ int i;
+
if (TARGET_AUTO_PIC)
target_flags |= MASK_CONST_GP;
@@ -4237,6 +4314,19 @@ ia64_override_options ()
ia64_tls_size = tmp;
}
+ if (!ia64_tune_string)
+ ia64_tune_string = "itanium2";
+
+ for (i = 0; i < pta_size; i++)
+ if (! strcmp (ia64_tune_string, processor_alias_table[i].name))
+ {
+ ia64_tune = processor_alias_table[i].processor;
+ break;
+ }
+
+ if (i == pta_size)
+ error ("bad value (%s) for -tune= switch", ia64_tune_string);
+
ia64_flag_schedule_insns2 = flag_schedule_insns_after_reload;
flag_schedule_insns_after_reload = 0;
@@ -4249,20 +4339,9 @@ ia64_override_options ()
real_format_for_mode[TFmode - QFmode] = &ieee_extended_intel_128_format;
}
-static enum attr_itanium_requires_unit0 ia64_safe_itanium_requires_unit0 PARAMS((rtx));
static enum attr_itanium_class ia64_safe_itanium_class PARAMS((rtx));
static enum attr_type ia64_safe_type PARAMS((rtx));
-static enum attr_itanium_requires_unit0
-ia64_safe_itanium_requires_unit0 (insn)
- rtx insn;
-{
- if (recog_memoized (insn) >= 0)
- return get_attr_itanium_requires_unit0 (insn);
- else
- return ITANIUM_REQUIRES_UNIT0_NO;
-}
-
static enum attr_itanium_class
ia64_safe_itanium_class (insn)
rtx insn;
@@ -5096,7 +5175,10 @@ group_barrier_needed_p (insn)
abort ();
}
- if (first_instruction)
+ if (first_instruction && INSN_P (insn)
+ && ia64_safe_itanium_class (insn) != ITANIUM_CLASS_IGNORE
+ && GET_CODE (PATTERN (insn)) != USE
+ && GET_CODE (PATTERN (insn)) != CLOBBER)
{
need_barrier = 0;
first_instruction = 0;
@@ -5229,6 +5311,7 @@ emit_all_insn_group_barriers (dump, insns)
}
}
}
+
static int errata_find_address_regs PARAMS ((rtx *, void *));
static void errata_emit_nops PARAMS ((rtx));
@@ -5374,92 +5457,92 @@ fixup_errata ()
}
}
-/* Instruction scheduling support. */
-/* Describe one bundle. */
-struct bundle
-{
- /* Zero if there's no possibility of a stop in this bundle other than
- at the end, otherwise the position of the optional stop bit. */
- int possible_stop;
- /* The types of the three slots. */
- enum attr_type t[3];
- /* The pseudo op to be emitted into the assembler output. */
- const char *name;
-};
+/* Instruction scheduling support. */
#define NR_BUNDLES 10
-/* A list of all available bundles. */
+/* A list of names of all available bundles. */
-static const struct bundle bundle[NR_BUNDLES] =
+static const char *bundle_name [NR_BUNDLES] =
{
- { 2, { TYPE_M, TYPE_I, TYPE_I }, ".mii" },
- { 1, { TYPE_M, TYPE_M, TYPE_I }, ".mmi" },
- { 0, { TYPE_M, TYPE_F, TYPE_I }, ".mfi" },
- { 0, { TYPE_M, TYPE_M, TYPE_F }, ".mmf" },
+ ".mii",
+ ".mmi",
+ ".mfi",
+ ".mmf",
#if NR_BUNDLES == 10
- { 0, { TYPE_B, TYPE_B, TYPE_B }, ".bbb" },
- { 0, { TYPE_M, TYPE_B, TYPE_B }, ".mbb" },
+ ".bbb",
+ ".mbb",
#endif
- { 0, { TYPE_M, TYPE_I, TYPE_B }, ".mib" },
- { 0, { TYPE_M, TYPE_M, TYPE_B }, ".mmb" },
- { 0, { TYPE_M, TYPE_F, TYPE_B }, ".mfb" },
- /* .mfi needs to occur earlier than .mlx, so that we only generate it if
- it matches an L type insn. Otherwise we'll try to generate L type
- nops. */
- { 0, { TYPE_M, TYPE_L, TYPE_X }, ".mlx" }
+ ".mib",
+ ".mmb",
+ ".mfb",
+ ".mlx"
};
-/* Describe a packet of instructions. Packets consist of two bundles that
- are visible to the hardware in one scheduling window. */
+/* Nonzero if we should insert stop bits into the schedule. */
-struct ia64_packet
-{
- const struct bundle *t1, *t2;
- /* Precomputed value of the first split issue in this packet if a cycle
- starts at its beginning. */
- int first_split;
- /* For convenience, the insn types are replicated here so we don't have
- to go through T1 and T2 all the time. */
- enum attr_type t[6];
-};
+int ia64_final_schedule = 0;
-/* An array containing all possible packets. */
-#define NR_PACKETS (NR_BUNDLES * NR_BUNDLES)
-static struct ia64_packet packets[NR_PACKETS];
+/* Codes of the corrsponding quieryied units: */
-/* Map attr_type to a string with the name. */
+static int _0mii_, _0mmi_, _0mfi_, _0mmf_;
+static int _0bbb_, _0mbb_, _0mib_, _0mmb_, _0mfb_, _0mlx_;
-static const char *const type_names[] =
-{
- "UNKNOWN", "A", "I", "M", "F", "B", "L", "X", "S"
-};
+static int _1mii_, _1mmi_, _1mfi_, _1mmf_;
+static int _1bbb_, _1mbb_, _1mib_, _1mmb_, _1mfb_, _1mlx_;
-/* Nonzero if we should insert stop bits into the schedule. */
-int ia64_final_schedule = 0;
+static int pos_1, pos_2, pos_3, pos_4, pos_5, pos_6;
+
+/* The following variable value is an insn group barrier. */
+
+static rtx dfa_stop_insn;
+
+/* The following variable value is the last issued insn. */
+
+static rtx last_scheduled_insn;
+
+/* The following variable value is size of the DFA state. */
+
+static size_t dfa_state_size;
+
+/* The following variable value is pointer to a DFA state used as
+ temporary variable. */
+
+static state_t temp_dfa_state = NULL;
+
+/* The following variable value is DFA state after issuing the last
+ insn. */
+
+static state_t prev_cycle_state = NULL;
+
+/* The following array element values are TRUE if the corresponding
+ insn reuqires to add stop bits before it. */
+
+static char *stops_p;
+
+/* The following variable is used to set up the mentioned above array. */
+
+static int stop_before_p = 0;
+
+/* The following variable value is length of the arrays `clocks' and
+ `add_cycles'. */
+
+static int clocks_length;
+
+/* The following array element values are cycles on which the
+ corresponding insn will be issued. The array is used only for
+ Itanium1. */
+
+static int *clocks;
+
+/* The following array element values are numbers of cycles should be
+ added to improve insn scheduling for MM_insns for Itanium1. */
+
+static int *add_cycles;
-static int itanium_split_issue PARAMS ((const struct ia64_packet *, int));
static rtx ia64_single_set PARAMS ((rtx));
-static int insn_matches_slot PARAMS ((const struct ia64_packet *, enum attr_type, int, rtx));
static void ia64_emit_insn_before PARAMS ((rtx, rtx));
-static void maybe_rotate PARAMS ((FILE *));
-static void finish_last_head PARAMS ((FILE *, int));
-static void rotate_one_bundle PARAMS ((FILE *));
-static void rotate_two_bundles PARAMS ((FILE *));
-static void nop_cycles_until PARAMS ((int, FILE *));
-static void cycle_end_fill_slots PARAMS ((FILE *));
-static int packet_matches_p PARAMS ((const struct ia64_packet *, int, int *));
-static int get_split PARAMS ((const struct ia64_packet *, int));
-static int find_best_insn PARAMS ((rtx *, enum attr_type *, int,
- const struct ia64_packet *, int));
-static void find_best_packet PARAMS ((int *, const struct ia64_packet **,
- rtx *, enum attr_type *, int));
-static int itanium_reorder PARAMS ((FILE *, rtx *, rtx *, int));
-static void dump_current_packet PARAMS ((FILE *));
-static void schedule_stop PARAMS ((FILE *));
-static rtx gen_nop_type PARAMS ((enum attr_type));
-static void ia64_emit_nops PARAMS ((void));
/* Map a bundle number to its pseudo-op. */
@@ -5467,55 +5550,9 @@ const char *
get_bundle_name (b)
int b;
{
- return bundle[b].name;
+ return bundle_name[b];
}
-/* Compute the slot which will cause a split issue in packet P if the
- current cycle begins at slot BEGIN. */
-
-static int
-itanium_split_issue (p, begin)
- const struct ia64_packet *p;
- int begin;
-{
- int type_count[TYPE_S];
- int i;
- int split = 6;
-
- if (begin < 3)
- {
- /* Always split before and after MMF. */
- if (p->t[0] == TYPE_M && p->t[1] == TYPE_M && p->t[2] == TYPE_F)
- return 3;
- if (p->t[3] == TYPE_M && p->t[4] == TYPE_M && p->t[5] == TYPE_F)
- return 3;
- /* Always split after MBB and BBB. */
- if (p->t[1] == TYPE_B)
- return 3;
- /* Split after first bundle in MIB BBB combination. */
- if (p->t[2] == TYPE_B && p->t[3] == TYPE_B)
- return 3;
- }
-
- memset (type_count, 0, sizeof type_count);
- for (i = begin; i < split; i++)
- {
- enum attr_type t0 = p->t[i];
- /* An MLX bundle reserves the same units as an MFI bundle. */
- enum attr_type t = (t0 == TYPE_L ? TYPE_F
- : t0 == TYPE_X ? TYPE_I
- : t0);
-
- /* Itanium can execute up to 3 branches, 2 floating point, 2 memory, and
- 2 integer per cycle. */
- int max = (t == TYPE_B ? 3 : 2);
- if (type_count[t] == max)
- return i;
-
- type_count[t]++;
- }
- return split;
-}
/* Return the maximum number of instructions a cpu can issue. */
@@ -5563,208 +5600,21 @@ ia64_adjust_cost (insn, link, dep_insn, cost)
rtx insn, link, dep_insn;
int cost;
{
- enum attr_type dep_type;
enum attr_itanium_class dep_class;
enum attr_itanium_class insn_class;
- rtx dep_set, set, src, addr;
-
- if (GET_CODE (PATTERN (insn)) == CLOBBER
- || GET_CODE (PATTERN (insn)) == USE
- || GET_CODE (PATTERN (dep_insn)) == CLOBBER
- || GET_CODE (PATTERN (dep_insn)) == USE
- /* @@@ Not accurate for indirect calls. */
- || GET_CODE (insn) == CALL_INSN
- || ia64_safe_type (insn) == TYPE_S)
- return 0;
- if (REG_NOTE_KIND (link) == REG_DEP_OUTPUT
- || REG_NOTE_KIND (link) == REG_DEP_ANTI)
- return 0;
+ if (REG_NOTE_KIND (link) != REG_DEP_OUTPUT)
+ return cost;
- dep_type = ia64_safe_type (dep_insn);
- dep_class = ia64_safe_itanium_class (dep_insn);
insn_class = ia64_safe_itanium_class (insn);
-
- /* Compares that feed a conditional branch can execute in the same
- cycle. */
- dep_set = ia64_single_set (dep_insn);
- set = ia64_single_set (insn);
-
- if (dep_type != TYPE_F
- && dep_set
- && GET_CODE (SET_DEST (dep_set)) == REG
- && PR_REG (REGNO (SET_DEST (dep_set)))
- && GET_CODE (insn) == JUMP_INSN)
+ dep_class = ia64_safe_itanium_class (dep_insn);
+ if (dep_class == ITANIUM_CLASS_ST || dep_class == ITANIUM_CLASS_STF
+ || insn_class == ITANIUM_CLASS_ST || insn_class == ITANIUM_CLASS_STF)
return 0;
- if (dep_set && GET_CODE (SET_DEST (dep_set)) == MEM)
- {
- /* ??? Can't find any information in the documenation about whether
- a sequence
- st [rx] = ra
- ld rb = [ry]
- splits issue. Assume it doesn't. */
- return 0;
- }
-
- src = set ? SET_SRC (set) : 0;
- addr = 0;
- if (set)
- {
- if (GET_CODE (SET_DEST (set)) == MEM)
- addr = XEXP (SET_DEST (set), 0);
- else if (GET_CODE (SET_DEST (set)) == SUBREG
- && GET_CODE (SUBREG_REG (SET_DEST (set))) == MEM)
- addr = XEXP (SUBREG_REG (SET_DEST (set)), 0);
- else
- {
- addr = src;
- if (GET_CODE (addr) == UNSPEC && XVECLEN (addr, 0) > 0)
- addr = XVECEXP (addr, 0, 0);
- while (GET_CODE (addr) == SUBREG || GET_CODE (addr) == ZERO_EXTEND)
- addr = XEXP (addr, 0);
- if (GET_CODE (addr) == MEM)
- addr = XEXP (addr, 0);
- else
- addr = 0;
- }
- }
-
- if (addr && GET_CODE (addr) == POST_MODIFY)
- addr = XEXP (addr, 0);
-
- set = ia64_single_set (dep_insn);
-
- if ((dep_class == ITANIUM_CLASS_IALU
- || dep_class == ITANIUM_CLASS_ILOG
- || dep_class == ITANIUM_CLASS_LD)
- && (insn_class == ITANIUM_CLASS_LD
- || insn_class == ITANIUM_CLASS_ST))
- {
- if (! addr || ! set)
- abort ();
- /* This isn't completely correct - an IALU that feeds an address has
- a latency of 1 cycle if it's issued in an M slot, but 2 cycles
- otherwise. Unfortunately there's no good way to describe this. */
- if (reg_overlap_mentioned_p (SET_DEST (set), addr))
- return cost + 1;
- }
-
- if ((dep_class == ITANIUM_CLASS_IALU
- || dep_class == ITANIUM_CLASS_ILOG
- || dep_class == ITANIUM_CLASS_LD)
- && (insn_class == ITANIUM_CLASS_MMMUL
- || insn_class == ITANIUM_CLASS_MMSHF
- || insn_class == ITANIUM_CLASS_MMSHFI))
- return 3;
-
- if (dep_class == ITANIUM_CLASS_FMAC
- && (insn_class == ITANIUM_CLASS_FMISC
- || insn_class == ITANIUM_CLASS_FCVTFX
- || insn_class == ITANIUM_CLASS_XMPY))
- return 7;
-
- if ((dep_class == ITANIUM_CLASS_FMAC
- || dep_class == ITANIUM_CLASS_FMISC
- || dep_class == ITANIUM_CLASS_FCVTFX
- || dep_class == ITANIUM_CLASS_XMPY)
- && insn_class == ITANIUM_CLASS_STF)
- return 8;
-
- /* Intel docs say only LD, ST, IALU, ILOG, ISHF consumers have latency 4,
- but HP engineers say any non-MM operation. */
- if ((dep_class == ITANIUM_CLASS_MMMUL
- || dep_class == ITANIUM_CLASS_MMSHF
- || dep_class == ITANIUM_CLASS_MMSHFI)
- && insn_class != ITANIUM_CLASS_MMMUL
- && insn_class != ITANIUM_CLASS_MMSHF
- && insn_class != ITANIUM_CLASS_MMSHFI)
- return 4;
-
return cost;
}
-/* Describe the current state of the Itanium pipeline. */
-static struct
-{
- /* The first slot that is used in the current cycle. */
- int first_slot;
- /* The next slot to fill. */
- int cur;
- /* The packet we have selected for the current issue window. */
- const struct ia64_packet *packet;
- /* The position of the split issue that occurs due to issue width
- limitations (6 if there's no split issue). */
- int split;
- /* Record data about the insns scheduled so far in the same issue
- window. The elements up to but not including FIRST_SLOT belong
- to the previous cycle, the ones starting with FIRST_SLOT belong
- to the current cycle. */
- enum attr_type types[6];
- rtx insns[6];
- int stopbit[6];
- /* Nonzero if we decided to schedule a stop bit. */
- int last_was_stop;
-} sched_data;
-
-/* Temporary arrays; they have enough elements to hold all insns that
- can be ready at the same time while scheduling of the current block.
- SCHED_READY can hold ready insns, SCHED_TYPES their types. */
-static rtx *sched_ready;
-static enum attr_type *sched_types;
-
-/* Determine whether an insn INSN of type ITYPE can fit into slot SLOT
- of packet P. */
-
-static int
-insn_matches_slot (p, itype, slot, insn)
- const struct ia64_packet *p;
- enum attr_type itype;
- int slot;
- rtx insn;
-{
- enum attr_itanium_requires_unit0 u0;
- enum attr_type stype = p->t[slot];
-
- if (insn)
- {
- u0 = ia64_safe_itanium_requires_unit0 (insn);
- if (u0 == ITANIUM_REQUIRES_UNIT0_YES)
- {
- int i;
- for (i = sched_data.first_slot; i < slot; i++)
- if (p->t[i] == stype
- || (stype == TYPE_F && p->t[i] == TYPE_L)
- || (stype == TYPE_I && p->t[i] == TYPE_X))
- return 0;
- }
- if (GET_CODE (insn) == CALL_INSN)
- {
- /* Reject calls in multiway branch packets. We want to limit
- the number of multiway branches we generate (since the branch
- predictor is limited), and this seems to work fairly well.
- (If we didn't do this, we'd have to add another test here to
- force calls into the third slot of the bundle.) */
- if (slot < 3)
- {
- if (p->t[1] == TYPE_B)
- return 0;
- }
- else
- {
- if (p->t[4] == TYPE_B)
- return 0;
- }
- }
- }
-
- if (itype == stype)
- return 1;
- if (itype == TYPE_A)
- return stype == TYPE_M || stype == TYPE_I;
- return 0;
-}
-
/* Like emit_insn_before, but skip cycle_display notes.
??? When cycle display notes are implemented, update this. */
@@ -5775,1055 +5625,1324 @@ ia64_emit_insn_before (insn, before)
emit_insn_before (insn, before);
}
-/* When rotating a bundle out of the issue window, insert a bundle selector
- insn in front of it. DUMP is the scheduling dump file or NULL. START
- is either 0 or 3, depending on whether we want to emit a bundle selector
- for the first bundle or the second bundle in the current issue window.
-
- The selector insns are emitted this late because the selected packet can
- be changed until parts of it get rotated out. */
+/* The following function marks insns who produce addresses for load
+ and store insns. Such insns will be placed into M slots because it
+ decrease latency time for Itanium1 (see function
+ `ia64_produce_address_p' and the DFA descriptions). */
static void
-finish_last_head (dump, start)
- FILE *dump;
- int start;
+ia64_dependencies_evaluation_hook (head, tail)
+ rtx head, tail;
{
- const struct ia64_packet *p = sched_data.packet;
- const struct bundle *b = start == 0 ? p->t1 : p->t2;
- int bundle_type = b - bundle;
- rtx insn;
- int i;
-
- if (! ia64_final_schedule)
- return;
-
- for (i = start; sched_data.insns[i] == 0; i++)
- if (i == start + 3)
- abort ();
- insn = sched_data.insns[i];
+ rtx insn, link, next, next_tail;
+
+ next_tail = NEXT_INSN (tail);
+ for (insn = head; insn != next_tail; insn = NEXT_INSN (insn))
+ if (INSN_P (insn))
+ insn->call = 0;
+ for (insn = head; insn != next_tail; insn = NEXT_INSN (insn))
+ if (INSN_P (insn)
+ && ia64_safe_itanium_class (insn) == ITANIUM_CLASS_IALU)
+ {
+ for (link = INSN_DEPEND (insn); link != 0; link = XEXP (link, 1))
+ {
+ next = XEXP (link, 0);
+ if ((ia64_safe_itanium_class (next) == ITANIUM_CLASS_ST
+ || ia64_safe_itanium_class (next) == ITANIUM_CLASS_STF)
+ && ia64_st_address_bypass_p (insn, next))
+ break;
+ else if ((ia64_safe_itanium_class (next) == ITANIUM_CLASS_LD
+ || ia64_safe_itanium_class (next)
+ == ITANIUM_CLASS_FLD)
+ && ia64_ld_address_bypass_p (insn, next))
+ break;
+ }
+ insn->call = link != 0;
+ }
+}
- if (dump)
- fprintf (dump, "// Emitting template before %d: %s\n",
- INSN_UID (insn), b->name);
+/* We're beginning a new block. Initialize data structures as necessary. */
- ia64_emit_insn_before (gen_bundle_selector (GEN_INT (bundle_type)), insn);
+static void
+ia64_sched_init (dump, sched_verbose, max_ready)
+ FILE *dump ATTRIBUTE_UNUSED;
+ int sched_verbose ATTRIBUTE_UNUSED;
+ int max_ready ATTRIBUTE_UNUSED;
+{
+#ifdef ENABLE_CHECKING
+ rtx insn;
+
+ if (reload_completed)
+ for (insn = NEXT_INSN (current_sched_info->prev_head);
+ insn != current_sched_info->next_tail;
+ insn = NEXT_INSN (insn))
+ if (SCHED_GROUP_P (insn))
+ abort ();
+#endif
+ last_scheduled_insn = NULL_RTX;
+ init_insn_group_barriers ();
}
-/* We can't schedule more insns this cycle. Fix up the scheduling state
- and advance FIRST_SLOT and CUR.
- We have to distribute the insns that are currently found between
- FIRST_SLOT and CUR into the slots of the packet we have selected. So
- far, they are stored successively in the fields starting at FIRST_SLOT;
- now they must be moved to the correct slots.
- DUMP is the current scheduling dump file, or NULL. */
+/* We are about to being issuing insns for this clock cycle.
+ Override the default sort algorithm to better slot instructions. */
-static void
-cycle_end_fill_slots (dump)
+static int
+ia64_dfa_sched_reorder (dump, sched_verbose, ready, pn_ready,
+ clock_var, reorder_type)
FILE *dump;
+ int sched_verbose;
+ rtx *ready;
+ int *pn_ready;
+ int clock_var ATTRIBUTE_UNUSED;
+ int reorder_type;
{
- const struct ia64_packet *packet = sched_data.packet;
- int slot, i;
- enum attr_type tmp_types[6];
- rtx tmp_insns[6];
+ int n_asms;
+ int n_ready = *pn_ready;
+ rtx *e_ready = ready + n_ready;
+ rtx *insnp;
- memcpy (tmp_types, sched_data.types, 6 * sizeof (enum attr_type));
- memcpy (tmp_insns, sched_data.insns, 6 * sizeof (rtx));
+ if (sched_verbose)
+ fprintf (dump, "// ia64_dfa_sched_reorder (type %d):\n", reorder_type);
- for (i = slot = sched_data.first_slot; i < sched_data.cur; i++)
+ if (reorder_type == 0)
{
- enum attr_type t = tmp_types[i];
- if (t != ia64_safe_type (tmp_insns[i]))
- abort ();
- while (! insn_matches_slot (packet, t, slot, tmp_insns[i]))
- {
- if (slot > sched_data.split)
- abort ();
- if (dump)
- fprintf (dump, "// Packet needs %s, have %s\n",
- type_names[packet->t[slot]], type_names[t]);
- sched_data.types[slot] = packet->t[slot];
- sched_data.insns[slot] = 0;
- sched_data.stopbit[slot] = 0;
-
- /* ??? TYPE_L instructions always fill up two slots, but we don't
- support TYPE_L nops. */
- if (packet->t[slot] == TYPE_L)
- abort ();
-
- slot++;
- }
-
- /* Do _not_ use T here. If T == TYPE_A, then we'd risk changing the
- actual slot type later. */
- sched_data.types[slot] = packet->t[slot];
- sched_data.insns[slot] = tmp_insns[i];
- sched_data.stopbit[slot] = 0;
- slot++;
+ /* First, move all USEs, CLOBBERs and other crud out of the way. */
+ n_asms = 0;
+ for (insnp = ready; insnp < e_ready; insnp++)
+ if (insnp < e_ready)
+ {
+ rtx insn = *insnp;
+ enum attr_type t = ia64_safe_type (insn);
+ if (t == TYPE_UNKNOWN)
+ {
+ if (GET_CODE (PATTERN (insn)) == ASM_INPUT
+ || asm_noperands (PATTERN (insn)) >= 0)
+ {
+ rtx lowest = ready[n_asms];
+ ready[n_asms] = insn;
+ *insnp = lowest;
+ n_asms++;
+ }
+ else
+ {
+ rtx highest = ready[n_ready - 1];
+ ready[n_ready - 1] = insn;
+ *insnp = highest;
+ return 1;
+ }
+ }
+ }
- /* TYPE_L instructions always fill up two slots. */
- if (t == TYPE_L)
+ if (n_asms < n_ready)
{
- sched_data.types[slot] = packet->t[slot];
- sched_data.insns[slot] = 0;
- sched_data.stopbit[slot] = 0;
- slot++;
+ /* Some normal insns to process. Skip the asms. */
+ ready += n_asms;
+ n_ready -= n_asms;
}
+ else if (n_ready > 0)
+ return 1;
}
- /* This isn't right - there's no need to pad out until the forced split;
- the CPU will automatically split if an insn isn't ready. */
-#if 0
- while (slot < sched_data.split)
+ if (ia64_final_schedule)
{
- sched_data.types[slot] = packet->t[slot];
- sched_data.insns[slot] = 0;
- sched_data.stopbit[slot] = 0;
- slot++;
+ int deleted = 0;
+ int nr_need_stop = 0;
+
+ for (insnp = ready; insnp < e_ready; insnp++)
+ if (safe_group_barrier_needed_p (*insnp))
+ nr_need_stop++;
+
+ if (reorder_type == 1 && n_ready == nr_need_stop)
+ return 0;
+ if (reorder_type == 0)
+ return 1;
+ insnp = e_ready;
+ /* Move down everything that needs a stop bit, preserving
+ relative order. */
+ while (insnp-- > ready + deleted)
+ while (insnp >= ready + deleted)
+ {
+ rtx insn = *insnp;
+ if (! safe_group_barrier_needed_p (insn))
+ break;
+ memmove (ready + 1, ready, (insnp - ready) * sizeof (rtx));
+ *ready = insn;
+ deleted++;
+ }
+ n_ready -= deleted;
+ ready += deleted;
}
-#endif
- sched_data.first_slot = sched_data.cur = slot;
+ return 1;
}
-/* Bundle rotations, as described in the Itanium optimization manual.
- We can rotate either one or both bundles out of the issue window.
- DUMP is the current scheduling dump file, or NULL. */
-
-static void
-rotate_one_bundle (dump)
- FILE *dump;
-{
- if (dump)
- fprintf (dump, "// Rotating one bundle.\n");
-
- finish_last_head (dump, 0);
- if (sched_data.cur > 3)
- {
- sched_data.cur -= 3;
- sched_data.first_slot -= 3;
- memmove (sched_data.types,
- sched_data.types + 3,
- sched_data.cur * sizeof *sched_data.types);
- memmove (sched_data.stopbit,
- sched_data.stopbit + 3,
- sched_data.cur * sizeof *sched_data.stopbit);
- memmove (sched_data.insns,
- sched_data.insns + 3,
- sched_data.cur * sizeof *sched_data.insns);
- sched_data.packet
- = &packets[(sched_data.packet->t2 - bundle) * NR_BUNDLES];
- }
- else
- {
- sched_data.cur = 0;
- sched_data.first_slot = 0;
- }
-}
+/* We are about to being issuing insns for this clock cycle. Override
+ the default sort algorithm to better slot instructions. */
-static void
-rotate_two_bundles (dump)
+static int
+ia64_sched_reorder (dump, sched_verbose, ready, pn_ready, clock_var)
FILE *dump;
+ int sched_verbose;
+ rtx *ready;
+ int *pn_ready;
+ int clock_var;
{
- if (dump)
- fprintf (dump, "// Rotating two bundles.\n");
-
- if (sched_data.cur == 0)
- return;
-
- finish_last_head (dump, 0);
- if (sched_data.cur > 3)
- finish_last_head (dump, 3);
- sched_data.cur = 0;
- sched_data.first_slot = 0;
+ return ia64_dfa_sched_reorder (dump, sched_verbose, ready,
+ pn_ready, clock_var, 0);
}
-/* We're beginning a new block. Initialize data structures as necessary. */
+/* Like ia64_sched_reorder, but called after issuing each insn.
+ Override the default sort algorithm to better slot instructions. */
-static void
-ia64_sched_init (dump, sched_verbose, max_ready)
+static int
+ia64_sched_reorder2 (dump, sched_verbose, ready, pn_ready, clock_var)
FILE *dump ATTRIBUTE_UNUSED;
int sched_verbose ATTRIBUTE_UNUSED;
- int max_ready;
+ rtx *ready;
+ int *pn_ready;
+ int clock_var;
{
- static int initialized = 0;
-
- if (! initialized)
- {
- int b1, b2, i;
-
- initialized = 1;
-
- for (i = b1 = 0; b1 < NR_BUNDLES; b1++)
- {
- const struct bundle *t1 = bundle + b1;
- for (b2 = 0; b2 < NR_BUNDLES; b2++, i++)
- {
- const struct bundle *t2 = bundle + b2;
-
- packets[i].t1 = t1;
- packets[i].t2 = t2;
- }
- }
- for (i = 0; i < NR_PACKETS; i++)
- {
- int j;
- for (j = 0; j < 3; j++)
- packets[i].t[j] = packets[i].t1->t[j];
- for (j = 0; j < 3; j++)
- packets[i].t[j + 3] = packets[i].t2->t[j];
- packets[i].first_split = itanium_split_issue (packets + i, 0);
- }
-
- }
-
- init_insn_group_barriers ();
-
- memset (&sched_data, 0, sizeof sched_data);
- sched_types = (enum attr_type *) xmalloc (max_ready
- * sizeof (enum attr_type));
- sched_ready = (rtx *) xmalloc (max_ready * sizeof (rtx));
+ if (ia64_tune == PROCESSOR_ITANIUM && reload_completed && last_scheduled_insn)
+ clocks [INSN_UID (last_scheduled_insn)] = clock_var;
+ return ia64_dfa_sched_reorder (dump, sched_verbose, ready, pn_ready,
+ clock_var, 1);
}
-/* See if the packet P can match the insns we have already scheduled. Return
- nonzero if so. In *PSLOT, we store the first slot that is available for
- more instructions if we choose this packet.
- SPLIT holds the last slot we can use, there's a split issue after it so
- scheduling beyond it would cause us to use more than one cycle. */
+/* We are about to issue INSN. Return the number of insns left on the
+ ready queue that can be issued this cycle. */
static int
-packet_matches_p (p, split, pslot)
- const struct ia64_packet *p;
- int split;
- int *pslot;
-{
- int filled = sched_data.cur;
- int first = sched_data.first_slot;
- int i, slot;
-
- /* First, check if the first of the two bundles must be a specific one (due
- to stop bits). */
- if (first > 0 && sched_data.stopbit[0] && p->t1->possible_stop != 1)
- return 0;
- if (first > 1 && sched_data.stopbit[1] && p->t1->possible_stop != 2)
- return 0;
-
- for (i = 0; i < first; i++)
- if (! insn_matches_slot (p, sched_data.types[i], i,
- sched_data.insns[i]))
- return 0;
- for (i = slot = first; i < filled; i++)
+ia64_variable_issue (dump, sched_verbose, insn, can_issue_more)
+ FILE *dump ATTRIBUTE_UNUSED;
+ int sched_verbose ATTRIBUTE_UNUSED;
+ rtx insn ATTRIBUTE_UNUSED;
+ int can_issue_more ATTRIBUTE_UNUSED;
+{
+ last_scheduled_insn = insn;
+ memcpy (prev_cycle_state, curr_state, dfa_state_size);
+ if (reload_completed)
{
- while (slot < split)
- {
- if (insn_matches_slot (p, sched_data.types[i], slot,
- sched_data.insns[i]))
- break;
- slot++;
- }
- if (slot == split)
- return 0;
- slot++;
+ if (group_barrier_needed_p (insn))
+ abort ();
+ if (GET_CODE (insn) == CALL_INSN)
+ init_insn_group_barriers ();
+ stops_p [INSN_UID (insn)] = stop_before_p;
+ stop_before_p = 0;
}
-
- if (pslot)
- *pslot = slot;
return 1;
}
-/* A frontend for itanium_split_issue. For a packet P and a slot
- number FIRST that describes the start of the current clock cycle,
- return the slot number of the first split issue. This function
- uses the cached number found in P if possible. */
+/* We are choosing insn from the ready queue. Return nonzero if INSN
+ can be chosen. */
static int
-get_split (p, first)
- const struct ia64_packet *p;
- int first;
+ia64_first_cycle_multipass_dfa_lookahead_guard (insn)
+ rtx insn;
{
- if (first == 0)
- return p->first_split;
- return itanium_split_issue (p, first);
+ if (insn == NULL_RTX || !INSN_P (insn))
+ abort ();
+ return (!reload_completed
+ || !safe_group_barrier_needed_p (insn));
}
-/* Given N_READY insns in the array READY, whose types are found in the
- corresponding array TYPES, return the insn that is best suited to be
- scheduled in slot SLOT of packet P. */
+/* The following variable value is pseudo-insn used by the DFA insn
+ scheduler to change the DFA state when the simulated clock is
+ increased. */
+
+static rtx dfa_pre_cycle_insn;
+
+/* We are about to being issuing INSN. Return nonzero if we can not
+ issue it on given cycle CLOCK and return zero if we should not sort
+ the ready queue on the next clock start. */
static int
-find_best_insn (ready, types, n_ready, p, slot)
- rtx *ready;
- enum attr_type *types;
- int n_ready;
- const struct ia64_packet *p;
- int slot;
+ia64_dfa_new_cycle (dump, verbose, insn, last_clock, clock, sort_p)
+ FILE *dump;
+ int verbose;
+ rtx insn;
+ int last_clock, clock;
+ int *sort_p;
{
- int best = -1;
- int best_pri = 0;
- while (n_ready-- > 0)
+ int setup_clocks_p = FALSE;
+
+ if (insn == NULL_RTX || !INSN_P (insn))
+ abort ();
+ if ((reload_completed && safe_group_barrier_needed_p (insn))
+ || (last_scheduled_insn
+ && (GET_CODE (last_scheduled_insn) == CALL_INSN
+ || GET_CODE (PATTERN (last_scheduled_insn)) == ASM_INPUT
+ || asm_noperands (PATTERN (last_scheduled_insn)) >= 0)))
{
- rtx insn = ready[n_ready];
- if (! insn)
- continue;
- if (best >= 0 && INSN_PRIORITY (ready[n_ready]) < best_pri)
- break;
- /* If we have equally good insns, one of which has a stricter
- slot requirement, prefer the one with the stricter requirement. */
- if (best >= 0 && types[n_ready] == TYPE_A)
- continue;
- if (insn_matches_slot (p, types[n_ready], slot, insn))
+ init_insn_group_barriers ();
+ if (verbose && dump)
+ fprintf (dump, "// Stop should be before %d%s\n", INSN_UID (insn),
+ last_clock == clock ? " + cycle advance" : "");
+ stop_before_p = 1;
+ if (last_clock == clock)
{
- best = n_ready;
- best_pri = INSN_PRIORITY (ready[best]);
-
- /* If there's no way we could get a stricter requirement, stop
- looking now. */
- if (types[n_ready] != TYPE_A
- && ia64_safe_itanium_requires_unit0 (ready[n_ready]))
- break;
- break;
+ state_transition (curr_state, dfa_stop_insn);
+ if (TARGET_EARLY_STOP_BITS)
+ *sort_p = (last_scheduled_insn == NULL_RTX
+ || GET_CODE (last_scheduled_insn) != CALL_INSN);
+ else
+ *sort_p = 0;
+ return 1;
+ }
+ else if (reload_completed)
+ setup_clocks_p = TRUE;
+ memcpy (curr_state, prev_cycle_state, dfa_state_size);
+ state_transition (curr_state, dfa_stop_insn);
+ state_transition (curr_state, dfa_pre_cycle_insn);
+ state_transition (curr_state, NULL);
+ }
+ else if (reload_completed)
+ setup_clocks_p = TRUE;
+ if (setup_clocks_p && ia64_tune == PROCESSOR_ITANIUM)
+ {
+ enum attr_itanium_class c = ia64_safe_itanium_class (insn);
+
+ if (c != ITANIUM_CLASS_MMMUL && c != ITANIUM_CLASS_MMSHF)
+ {
+ rtx link;
+ int d = -1;
+
+ for (link = LOG_LINKS (insn); link; link = XEXP (link, 1))
+ if (REG_NOTE_KIND (link) == 0)
+ {
+ enum attr_itanium_class dep_class;
+ rtx dep_insn = XEXP (link, 0);
+
+ dep_class = ia64_safe_itanium_class (dep_insn);
+ if ((dep_class == ITANIUM_CLASS_MMMUL
+ || dep_class == ITANIUM_CLASS_MMSHF)
+ && last_clock - clocks [INSN_UID (dep_insn)] < 4
+ && (d < 0
+ || last_clock - clocks [INSN_UID (dep_insn)] < d))
+ d = last_clock - clocks [INSN_UID (dep_insn)];
+ }
+ if (d >= 0)
+ add_cycles [INSN_UID (insn)] = 3 - d;
}
}
- return best;
+ return 0;
}
-/* Select the best packet to use given the current scheduler state and the
- current ready list.
- READY is an array holding N_READY ready insns; TYPES is a corresponding
- array that holds their types. Store the best packet in *PPACKET and the
- number of insns that can be scheduled in the current cycle in *PBEST. */
+
-static void
-find_best_packet (pbest, ppacket, ready, types, n_ready)
- int *pbest;
- const struct ia64_packet **ppacket;
- rtx *ready;
- enum attr_type *types;
- int n_ready;
-{
- int first = sched_data.first_slot;
- int best = 0;
- int lowest_end = 6;
- const struct ia64_packet *best_packet = NULL;
- int i;
+/* The following page contains abstract data `bundle states' which are
+ used for bundling insns (inserting nops and template generation). */
+
+/* The following describes state of insn bundling. */
+
+struct bundle_state
+{
+ /* Unique bundle state number to identify them in the debugging
+ output */
+ int unique_num;
+ rtx insn; /* corresponding insn, NULL for the 1st and the last state */
+ /* number nops before and after the insn */
+ short before_nops_num, after_nops_num;
+ int insn_num; /* insn number (0 - for initial state, 1 - for the 1st
+ insn */
+ int cost; /* cost of the state in cycles */
+ int accumulated_insns_num; /* number of all previous insns including
+ nops. L is considered as 2 insns */
+ int branch_deviation; /* deviation of previous branches from 3rd slots */
+ struct bundle_state *next; /* next state with the same insn_num */
+ struct bundle_state *originator; /* originator (previous insn state) */
+ /* All bundle states are in the following chain. */
+ struct bundle_state *allocated_states_chain;
+ /* The DFA State after issuing the insn and the nops. */
+ state_t dfa_state;
+};
- for (i = 0; i < NR_PACKETS; i++)
- {
- const struct ia64_packet *p = packets + i;
- int slot;
- int split = get_split (p, first);
- int win = 0;
- int first_slot, last_slot;
- int b_nops = 0;
+/* The following is map insn number to the corresponding bundle state. */
- if (! packet_matches_p (p, split, &first_slot))
- continue;
+static struct bundle_state **index_to_bundle_states;
- memcpy (sched_ready, ready, n_ready * sizeof (rtx));
+/* The unique number of next bundle state. */
- win = 0;
- last_slot = 6;
- for (slot = first_slot; slot < split; slot++)
- {
- int insn_nr;
+static int bundle_states_num;
- /* Disallow a degenerate case where the first bundle doesn't
- contain anything but NOPs! */
- if (first_slot == 0 && win == 0 && slot == 3)
- {
- win = -1;
- break;
- }
+/* All allocated bundle states are in the following chain. */
- insn_nr = find_best_insn (sched_ready, types, n_ready, p, slot);
- if (insn_nr >= 0)
- {
- sched_ready[insn_nr] = 0;
- last_slot = slot;
- win++;
- }
- else if (p->t[slot] == TYPE_B)
- b_nops++;
- }
- /* We must disallow MBB/BBB packets if any of their B slots would be
- filled with nops. */
- if (last_slot < 3)
- {
- if (p->t[1] == TYPE_B && (b_nops || last_slot < 2))
- win = -1;
- }
- else
- {
- if (p->t[4] == TYPE_B && (b_nops || last_slot < 5))
- win = -1;
- }
+static struct bundle_state *allocated_bundle_states_chain;
- if (win > best
- || (win == best && last_slot < lowest_end))
- {
- best = win;
- lowest_end = last_slot;
- best_packet = p;
- }
- }
- *pbest = best;
- *ppacket = best_packet;
-}
+/* All allocated but not used bundle states are in the following
+ chain. */
-/* Reorder the ready list so that the insns that can be issued in this cycle
- are found in the correct order at the end of the list.
- DUMP is the scheduling dump file, or NULL. READY points to the start,
- E_READY to the end of the ready list. MAY_FAIL determines what should be
- done if no insns can be scheduled in this cycle: if it is zero, we abort,
- otherwise we return 0.
- Return 1 if any insns can be scheduled in this cycle. */
+static struct bundle_state *free_bundle_state_chain;
-static int
-itanium_reorder (dump, ready, e_ready, may_fail)
- FILE *dump;
- rtx *ready;
- rtx *e_ready;
- int may_fail;
-{
- const struct ia64_packet *best_packet;
- int n_ready = e_ready - ready;
- int first = sched_data.first_slot;
- int i, best, best_split, filled;
- for (i = 0; i < n_ready; i++)
- sched_types[i] = ia64_safe_type (ready[i]);
+/* The following function returns a free bundle state. */
- find_best_packet (&best, &best_packet, ready, sched_types, n_ready);
+static struct bundle_state *
+get_free_bundle_state ()
+{
+ struct bundle_state *result;
- if (best == 0)
+ if (free_bundle_state_chain != NULL)
{
- if (may_fail)
- return 0;
- abort ();
+ result = free_bundle_state_chain;
+ free_bundle_state_chain = result->next;
}
-
- if (dump)
+ else
{
- fprintf (dump, "// Selected bundles: %s %s (%d insns)\n",
- best_packet->t1->name,
- best_packet->t2 ? best_packet->t2->name : NULL, best);
+ result = xmalloc (sizeof (struct bundle_state));
+ result->dfa_state = xmalloc (dfa_state_size);
+ result->allocated_states_chain = allocated_bundle_states_chain;
+ allocated_bundle_states_chain = result;
}
+ result->unique_num = bundle_states_num++;
+ return result;
+
+}
- best_split = itanium_split_issue (best_packet, first);
- packet_matches_p (best_packet, best_split, &filled);
+/* The following function frees given bundle state. */
- for (i = filled; i < best_split; i++)
- {
- int insn_nr;
+static void
+free_bundle_state (state)
+ struct bundle_state *state;
+{
+ state->next = free_bundle_state_chain;
+ free_bundle_state_chain = state;
+}
- insn_nr = find_best_insn (ready, sched_types, n_ready, best_packet, i);
- if (insn_nr >= 0)
- {
- rtx insn = ready[insn_nr];
- memmove (ready + insn_nr, ready + insn_nr + 1,
- (n_ready - insn_nr - 1) * sizeof (rtx));
- memmove (sched_types + insn_nr, sched_types + insn_nr + 1,
- (n_ready - insn_nr - 1) * sizeof (enum attr_type));
- ready[--n_ready] = insn;
- }
- }
+/* Start work with abstract data `bundle states'. */
- sched_data.packet = best_packet;
- sched_data.split = best_split;
- return 1;
+static void
+initiate_bundle_states ()
+{
+ bundle_states_num = 0;
+ free_bundle_state_chain = NULL;
+ allocated_bundle_states_chain = NULL;
}
-/* Dump information about the current scheduling state to file DUMP. */
+/* Finish work with abstract data `bundle states'. */
static void
-dump_current_packet (dump)
- FILE *dump;
+finish_bundle_states ()
{
- int i;
- fprintf (dump, "// %d slots filled:", sched_data.cur);
- for (i = 0; i < sched_data.first_slot; i++)
- {
- rtx insn = sched_data.insns[i];
- fprintf (dump, " %s", type_names[sched_data.types[i]]);
- if (insn)
- fprintf (dump, "/%s", type_names[ia64_safe_type (insn)]);
- if (sched_data.stopbit[i])
- fprintf (dump, " ;;");
- }
- fprintf (dump, " :::");
- for (i = sched_data.first_slot; i < sched_data.cur; i++)
+ struct bundle_state *curr_state, *next_state;
+
+ for (curr_state = allocated_bundle_states_chain;
+ curr_state != NULL;
+ curr_state = next_state)
{
- rtx insn = sched_data.insns[i];
- enum attr_type t = ia64_safe_type (insn);
- fprintf (dump, " (%d) %s", INSN_UID (insn), type_names[t]);
+ next_state = curr_state->allocated_states_chain;
+ free (curr_state->dfa_state);
+ free (curr_state);
}
- fprintf (dump, "\n");
}
-/* Schedule a stop bit. DUMP is the current scheduling dump file, or
- NULL. */
+/* Hash table of the bundle states. The key is dfa_state and insn_num
+ of the bundle states. */
-static void
-schedule_stop (dump)
- FILE *dump;
-{
- const struct ia64_packet *best = sched_data.packet;
- int i;
- int best_stop = 6;
+static htab_t bundle_state_table;
- if (dump)
- fprintf (dump, "// Stop bit, cur = %d.\n", sched_data.cur);
+/* The function returns hash of BUNDLE_STATE. */
- if (sched_data.cur == 0)
- {
- if (dump)
- fprintf (dump, "// At start of bundle, so nothing to do.\n");
-
- rotate_two_bundles (NULL);
- return;
- }
+static unsigned
+bundle_state_hash (bundle_state)
+ const void *bundle_state;
+{
+ const struct bundle_state *state = (struct bundle_state *) bundle_state;
+ unsigned result, i;
- for (i = -1; i < NR_PACKETS; i++)
- {
- /* This is a slight hack to give the current packet the first chance.
- This is done to avoid e.g. switching from MIB to MBB bundles. */
- const struct ia64_packet *p = (i >= 0 ? packets + i : sched_data.packet);
- int split = get_split (p, sched_data.first_slot);
- const struct bundle *compare;
- int next, stoppos;
+ for (result = i = 0; i < dfa_state_size; i++)
+ result += (((unsigned char *) state->dfa_state) [i]
+ << ((i % CHAR_BIT) * 3 + CHAR_BIT));
+ return result + state->insn_num;
+}
- if (! packet_matches_p (p, split, &next))
- continue;
+/* The function returns nonzero if the bundle state keys are equal. */
- compare = next > 3 ? p->t2 : p->t1;
+static int
+bundle_state_eq_p (bundle_state_1, bundle_state_2)
+ const void *bundle_state_1;
+ const void *bundle_state_2;
+{
+ const struct bundle_state * state1 = (struct bundle_state *) bundle_state_1;
+ const struct bundle_state * state2 = (struct bundle_state *) bundle_state_2;
- stoppos = 3;
- if (compare->possible_stop)
- stoppos = compare->possible_stop;
- if (next > 3)
- stoppos += 3;
+ return (state1->insn_num == state2->insn_num
+ && memcmp (state1->dfa_state, state2->dfa_state,
+ dfa_state_size) == 0);
+}
- if (stoppos < next || stoppos >= best_stop)
- {
- if (compare->possible_stop == 0)
- continue;
- stoppos = (next > 3 ? 6 : 3);
- }
- if (stoppos < next || stoppos >= best_stop)
- continue;
+/* The function inserts the BUNDLE_STATE into the hash table. The
+ function returns nonzero if the bundle has been inserted into the
+ table. The table contains the best bundle state with given key. */
- if (dump)
- fprintf (dump, "// switching from %s %s to %s %s (stop at %d)\n",
- best->t1->name, best->t2->name, p->t1->name, p->t2->name,
- stoppos);
+static int
+insert_bundle_state (bundle_state)
+ struct bundle_state *bundle_state;
+{
+ void **entry_ptr;
- best_stop = stoppos;
- best = p;
+ entry_ptr = htab_find_slot (bundle_state_table, bundle_state, 1);
+ if (*entry_ptr == NULL)
+ {
+ bundle_state->next = index_to_bundle_states [bundle_state->insn_num];
+ index_to_bundle_states [bundle_state->insn_num] = bundle_state;
+ *entry_ptr = (void *) bundle_state;
+ return TRUE;
}
-
- sched_data.packet = best;
- cycle_end_fill_slots (dump);
- while (sched_data.cur < best_stop)
+ else if (bundle_state->cost < ((struct bundle_state *) *entry_ptr)->cost
+ || (bundle_state->cost == ((struct bundle_state *) *entry_ptr)->cost
+ && (((struct bundle_state *)*entry_ptr)->accumulated_insns_num
+ > bundle_state->accumulated_insns_num
+ || (((struct bundle_state *)
+ *entry_ptr)->accumulated_insns_num
+ == bundle_state->accumulated_insns_num
+ && ((struct bundle_state *)
+ *entry_ptr)->branch_deviation
+ > bundle_state->branch_deviation))))
+
{
- sched_data.types[sched_data.cur] = best->t[sched_data.cur];
- sched_data.insns[sched_data.cur] = 0;
- sched_data.stopbit[sched_data.cur] = 0;
- sched_data.cur++;
+ struct bundle_state temp;
+
+ temp = *(struct bundle_state *) *entry_ptr;
+ *(struct bundle_state *) *entry_ptr = *bundle_state;
+ ((struct bundle_state *) *entry_ptr)->next = temp.next;
+ *bundle_state = temp;
}
- sched_data.stopbit[sched_data.cur - 1] = 1;
- sched_data.first_slot = best_stop;
+ return FALSE;
+}
+
+/* Start work with the hash table. */
- if (dump)
- dump_current_packet (dump);
+static void
+initiate_bundle_state_table ()
+{
+ bundle_state_table = htab_create (50, bundle_state_hash, bundle_state_eq_p,
+ (htab_del) 0);
}
-/* If necessary, perform one or two rotations on the scheduling state.
- This should only be called if we are starting a new cycle. */
+/* Finish work with the hash table. */
static void
-maybe_rotate (dump)
- FILE *dump;
+finish_bundle_state_table ()
{
- cycle_end_fill_slots (dump);
- if (sched_data.cur == 6)
- rotate_two_bundles (dump);
- else if (sched_data.cur >= 3)
- rotate_one_bundle (dump);
- sched_data.first_slot = sched_data.cur;
+ htab_delete (bundle_state_table);
}
-/* The clock cycle when ia64_sched_reorder was last called. */
-static int prev_cycle;
+
-/* The first insn scheduled in the previous cycle. This is the saved
- value of sched_data.first_slot. */
-static int prev_first;
+/* The following variable is a insn `nop' used to check bundle states
+ with different number of inserted nops. */
-/* Emit NOPs to fill the delay between PREV_CYCLE and CLOCK_VAR. Used to
- pad out the delay between MM (shifts, etc.) and integer operations. */
+static rtx ia64_nop;
-static void
-nop_cycles_until (clock_var, dump)
- int clock_var;
- FILE *dump;
+/* The following function tries to issue NOPS_NUM nops for the current
+ state without advancing processor cycle. If it failed, the
+ function returns FALSE and frees the current state. */
+
+static int
+try_issue_nops (curr_state, nops_num)
+ struct bundle_state *curr_state;
+ int nops_num;
{
- int prev_clock = prev_cycle;
- int cycles_left = clock_var - prev_clock;
- bool did_stop = false;
+ int i;
- /* Finish the previous cycle; pad it out with NOPs. */
- if (sched_data.cur == 3)
- {
- sched_emit_insn (gen_insn_group_barrier (GEN_INT (3)));
- did_stop = true;
- maybe_rotate (dump);
- }
- else if (sched_data.cur > 0)
- {
- int need_stop = 0;
- int split = itanium_split_issue (sched_data.packet, prev_first);
+ for (i = 0; i < nops_num; i++)
+ if (state_transition (curr_state->dfa_state, ia64_nop) >= 0)
+ {
+ free_bundle_state (curr_state);
+ return FALSE;
+ }
+ return TRUE;
+}
- if (sched_data.cur < 3 && split > 3)
- {
- split = 3;
- need_stop = 1;
- }
+/* The following function tries to issue INSN for the current
+ state without advancing processor cycle. If it failed, the
+ function returns FALSE and frees the current state. */
- if (split > sched_data.cur)
- {
- int i;
- for (i = sched_data.cur; i < split; i++)
- {
- rtx t = sched_emit_insn (gen_nop_type (sched_data.packet->t[i]));
- sched_data.types[i] = sched_data.packet->t[i];
- sched_data.insns[i] = t;
- sched_data.stopbit[i] = 0;
- }
- sched_data.cur = split;
- }
+static int
+try_issue_insn (curr_state, insn)
+ struct bundle_state *curr_state;
+ rtx insn;
+{
+ if (insn && state_transition (curr_state->dfa_state, insn) >= 0)
+ {
+ free_bundle_state (curr_state);
+ return FALSE;
+ }
+ return TRUE;
+}
- if (! need_stop && sched_data.cur > 0 && sched_data.cur < 6
- && cycles_left > 1)
- {
- int i;
- for (i = sched_data.cur; i < 6; i++)
- {
- rtx t = sched_emit_insn (gen_nop_type (sched_data.packet->t[i]));
- sched_data.types[i] = sched_data.packet->t[i];
- sched_data.insns[i] = t;
- sched_data.stopbit[i] = 0;
- }
- sched_data.cur = 6;
- cycles_left--;
- need_stop = 1;
- }
+/* The following function tries to issue BEFORE_NOPS_NUM nops and INSN
+ starting with ORIGINATOR without advancing processor cycle. If
+ TRY_BUNDLE_END_P is TRUE, the function also tries to issue nops to
+ fill all bundle. If it was successful, the function creates new
+ bundle state and insert into the hash table and into
+ `index_to_bundle_states'. */
- if (need_stop || sched_data.cur == 6)
+static void
+issue_nops_and_insn (originator, before_nops_num, insn, try_bundle_end_p)
+ struct bundle_state *originator;
+ int before_nops_num;
+ rtx insn;
+ int try_bundle_end_p;
+{
+ struct bundle_state *curr_state;
+
+ curr_state = get_free_bundle_state ();
+ memcpy (curr_state->dfa_state, originator->dfa_state, dfa_state_size);
+ curr_state->insn = insn;
+ curr_state->insn_num = originator->insn_num + 1;
+ curr_state->cost = originator->cost;
+ curr_state->originator = originator;
+ curr_state->before_nops_num = before_nops_num;
+ curr_state->after_nops_num = 0;
+ curr_state->accumulated_insns_num
+ = originator->accumulated_insns_num + before_nops_num;
+ curr_state->branch_deviation = originator->branch_deviation;
+ if (insn == NULL_RTX)
+ abort ();
+ else if (INSN_CODE (insn) == CODE_FOR_insn_group_barrier)
+ {
+ if (GET_MODE (insn) == TImode)
+ abort ();
+ if (!try_issue_nops (curr_state, before_nops_num))
+ return;
+ if (!try_issue_insn (curr_state, insn))
+ return;
+ memcpy (temp_dfa_state, curr_state->dfa_state, dfa_state_size);
+ if (state_transition (temp_dfa_state, dfa_pre_cycle_insn) >= 0
+ && curr_state->accumulated_insns_num % 3 != 0)
{
- sched_emit_insn (gen_insn_group_barrier (GEN_INT (3)));
- did_stop = true;
+ free_bundle_state (curr_state);
+ return;
}
- maybe_rotate (dump);
}
-
- cycles_left--;
- while (cycles_left > 0)
+ else if (GET_MODE (insn) != TImode)
{
- sched_emit_insn (gen_bundle_selector (GEN_INT (0)));
- sched_emit_insn (gen_nop_type (TYPE_M));
- sched_emit_insn (gen_nop_type (TYPE_I));
- if (cycles_left > 1)
+ if (!try_issue_nops (curr_state, before_nops_num))
+ return;
+ if (!try_issue_insn (curr_state, insn))
+ return;
+ if (GET_CODE (PATTERN (insn)) != ASM_INPUT
+ && asm_noperands (PATTERN (insn)) < 0)
+ curr_state->accumulated_insns_num++;
+ if (ia64_safe_type (insn) == TYPE_L)
+ curr_state->accumulated_insns_num++;
+ }
+ else
+ {
+ state_transition (curr_state->dfa_state, dfa_pre_cycle_insn);
+ state_transition (curr_state->dfa_state, NULL);
+ curr_state->cost++;
+ if (!try_issue_nops (curr_state, before_nops_num))
+ return;
+ if (!try_issue_insn (curr_state, insn))
+ return;
+ if (GET_CODE (PATTERN (insn)) != ASM_INPUT
+ && asm_noperands (PATTERN (insn)) < 0)
+ curr_state->accumulated_insns_num++;
+ if (ia64_safe_type (insn) == TYPE_L)
+ curr_state->accumulated_insns_num++;
+ }
+ if (ia64_safe_type (insn) == TYPE_B)
+ curr_state->branch_deviation
+ += 2 - (curr_state->accumulated_insns_num - 1) % 3;
+ if (try_bundle_end_p && curr_state->accumulated_insns_num % 3 != 0)
+ {
+ if (insert_bundle_state (curr_state))
{
- sched_emit_insn (gen_insn_group_barrier (GEN_INT (2)));
- cycles_left--;
+ state_t dfa_state;
+ struct bundle_state *curr_state1;
+ struct bundle_state *allocated_states_chain;
+
+ curr_state1 = get_free_bundle_state ();
+ dfa_state = curr_state1->dfa_state;
+ allocated_states_chain = curr_state1->allocated_states_chain;
+ *curr_state1 = *curr_state;
+ curr_state1->dfa_state = dfa_state;
+ curr_state1->allocated_states_chain = allocated_states_chain;
+ memcpy (curr_state1->dfa_state, curr_state->dfa_state,
+ dfa_state_size);
+ curr_state = curr_state1;
}
- sched_emit_insn (gen_nop_type (TYPE_I));
- sched_emit_insn (gen_insn_group_barrier (GEN_INT (3)));
- did_stop = true;
- cycles_left--;
+ if (!try_issue_nops (curr_state,
+ 3 - curr_state->accumulated_insns_num % 3))
+ return;
+ curr_state->after_nops_num
+ = 3 - curr_state->accumulated_insns_num % 3;
+ curr_state->accumulated_insns_num
+ += 3 - curr_state->accumulated_insns_num % 3;
}
+ if (!insert_bundle_state (curr_state))
+ free_bundle_state (curr_state);
+ return;
+}
+
+/* The following function returns position in the two window bundle
+ for given STATE. */
- if (did_stop)
- init_insn_group_barriers ();
+static int
+get_max_pos (state)
+ state_t state;
+{
+ if (cpu_unit_reservation_p (state, pos_6))
+ return 6;
+ else if (cpu_unit_reservation_p (state, pos_5))
+ return 5;
+ else if (cpu_unit_reservation_p (state, pos_4))
+ return 4;
+ else if (cpu_unit_reservation_p (state, pos_3))
+ return 3;
+ else if (cpu_unit_reservation_p (state, pos_2))
+ return 2;
+ else if (cpu_unit_reservation_p (state, pos_1))
+ return 1;
+ else
+ return 0;
}
-/* We are about to being issuing insns for this clock cycle.
- Override the default sort algorithm to better slot instructions. */
+/* The function returns code of a possible template for given position
+ and state. The function should be called only with 2 values of
+ position equal to 3 or 6. */
static int
-ia64_internal_sched_reorder (dump, sched_verbose, ready, pn_ready,
- reorder_type, clock_var)
- FILE *dump ATTRIBUTE_UNUSED;
- int sched_verbose ATTRIBUTE_UNUSED;
- rtx *ready;
- int *pn_ready;
- int reorder_type, clock_var;
+get_template (state, pos)
+ state_t state;
+ int pos;
{
- int n_asms;
- int n_ready = *pn_ready;
- rtx *e_ready = ready + n_ready;
- rtx *insnp;
-
- if (sched_verbose)
+ switch (pos)
{
- fprintf (dump, "// ia64_sched_reorder (type %d):\n", reorder_type);
- dump_current_packet (dump);
+ case 3:
+ if (cpu_unit_reservation_p (state, _0mii_))
+ return 0;
+ else if (cpu_unit_reservation_p (state, _0mmi_))
+ return 1;
+ else if (cpu_unit_reservation_p (state, _0mfi_))
+ return 2;
+ else if (cpu_unit_reservation_p (state, _0mmf_))
+ return 3;
+ else if (cpu_unit_reservation_p (state, _0bbb_))
+ return 4;
+ else if (cpu_unit_reservation_p (state, _0mbb_))
+ return 5;
+ else if (cpu_unit_reservation_p (state, _0mib_))
+ return 6;
+ else if (cpu_unit_reservation_p (state, _0mmb_))
+ return 7;
+ else if (cpu_unit_reservation_p (state, _0mfb_))
+ return 8;
+ else if (cpu_unit_reservation_p (state, _0mlx_))
+ return 9;
+ else
+ abort ();
+ case 6:
+ if (cpu_unit_reservation_p (state, _1mii_))
+ return 0;
+ else if (cpu_unit_reservation_p (state, _1mmi_))
+ return 1;
+ else if (cpu_unit_reservation_p (state, _1mfi_))
+ return 2;
+ else if (_1mmf_ >= 0 && cpu_unit_reservation_p (state, _1mmf_))
+ return 3;
+ else if (cpu_unit_reservation_p (state, _1bbb_))
+ return 4;
+ else if (cpu_unit_reservation_p (state, _1mbb_))
+ return 5;
+ else if (cpu_unit_reservation_p (state, _1mib_))
+ return 6;
+ else if (cpu_unit_reservation_p (state, _1mmb_))
+ return 7;
+ else if (cpu_unit_reservation_p (state, _1mfb_))
+ return 8;
+ else if (cpu_unit_reservation_p (state, _1mlx_))
+ return 9;
+ else
+ abort ();
+ default:
+ abort ();
}
+}
- /* Work around the pipeline flush that will occurr if the results of
- an MM instruction are accessed before the result is ready. Intel
- documentation says this only happens with IALU, ISHF, ILOG, LD,
- and ST consumers, but experimental evidence shows that *any* non-MM
- type instruction will incurr the flush. */
- if (reorder_type == 0 && clock_var > 0 && ia64_final_schedule)
- {
- for (insnp = ready; insnp < e_ready; insnp++)
- {
- rtx insn = *insnp, link;
- enum attr_itanium_class t = ia64_safe_itanium_class (insn);
-
- if (t == ITANIUM_CLASS_MMMUL
- || t == ITANIUM_CLASS_MMSHF
- || t == ITANIUM_CLASS_MMSHFI)
- continue;
+/* The following function returns an insn important for insn bundling
+ followed by INSN and before TAIL. */
- for (link = LOG_LINKS (insn); link; link = XEXP (link, 1))
- if (REG_NOTE_KIND (link) == 0)
- {
- rtx other = XEXP (link, 0);
- enum attr_itanium_class t0 = ia64_safe_itanium_class (other);
- if (t0 == ITANIUM_CLASS_MMSHF || t0 == ITANIUM_CLASS_MMMUL)
- {
- nop_cycles_until (clock_var, sched_verbose ? dump : NULL);
- goto out;
- }
- }
- }
- }
- out:
+static rtx
+get_next_important_insn (insn, tail)
+ rtx insn, tail;
+{
+ for (; insn && insn != tail; insn = NEXT_INSN (insn))
+ if (INSN_P (insn)
+ && ia64_safe_itanium_class (insn) != ITANIUM_CLASS_IGNORE
+ && GET_CODE (PATTERN (insn)) != USE
+ && GET_CODE (PATTERN (insn)) != CLOBBER)
+ return insn;
+ return NULL_RTX;
+}
- prev_first = sched_data.first_slot;
- prev_cycle = clock_var;
+/* The following function does insn bundling. Bundling algorithm is
+ based on dynamic programming. It tries to insert different number of
+ nop insns before/after the real insns. At the end of EBB, it chooses the
+ best alternative and then, moving back in EBB, inserts templates for
+ the best alternative. The algorithm is directed by information
+ (changes of simulated processor cycle) created by the 2nd insn
+ scheduling. */
- if (reorder_type == 0)
- maybe_rotate (sched_verbose ? dump : NULL);
+static void
+bundling (dump, verbose, prev_head_insn, tail)
+ FILE *dump;
+ int verbose;
+ rtx prev_head_insn, tail;
+{
+ struct bundle_state *curr_state, *next_state, *best_state;
+ rtx insn, next_insn;
+ int insn_num;
+ int i, bundle_end_p;
+ int pos, max_pos, template0, template1;
+ rtx b;
+ rtx nop;
+ enum attr_type type;
- /* First, move all USEs, CLOBBERs and other crud out of the way. */
- n_asms = 0;
- for (insnp = ready; insnp < e_ready; insnp++)
- if (insnp < e_ready)
+ insn_num = 0;
+ for (insn = NEXT_INSN (prev_head_insn);
+ insn && insn != tail;
+ insn = NEXT_INSN (insn))
+ if (INSN_P (insn))
+ insn_num++;
+ if (insn_num == 0)
+ return;
+ bundling_p = 1;
+ dfa_clean_insn_cache ();
+ initiate_bundle_state_table ();
+ index_to_bundle_states = xmalloc ((insn_num + 2)
+ * sizeof (struct bundle_state *));
+ /* First (forward) pass -- generates states. */
+ curr_state = get_free_bundle_state ();
+ curr_state->insn = NULL;
+ curr_state->before_nops_num = 0;
+ curr_state->after_nops_num = 0;
+ curr_state->insn_num = 0;
+ curr_state->cost = 0;
+ curr_state->accumulated_insns_num = 0;
+ curr_state->branch_deviation = 0;
+ curr_state->next = NULL;
+ curr_state->originator = NULL;
+ state_reset (curr_state->dfa_state);
+ index_to_bundle_states [0] = curr_state;
+ insn_num = 0;
+ for (insn = NEXT_INSN (prev_head_insn);
+ insn != tail;
+ insn = NEXT_INSN (insn))
+ if (INSN_P (insn)
+ && (ia64_safe_itanium_class (insn) == ITANIUM_CLASS_IGNORE
+ || GET_CODE (PATTERN (insn)) == USE
+ || GET_CODE (PATTERN (insn)) == CLOBBER)
+ && GET_MODE (insn) == TImode)
{
- rtx insn = *insnp;
- enum attr_type t = ia64_safe_type (insn);
- if (t == TYPE_UNKNOWN)
- {
- if (GET_CODE (PATTERN (insn)) == ASM_INPUT
- || asm_noperands (PATTERN (insn)) >= 0)
- {
- rtx lowest = ready[n_asms];
- ready[n_asms] = insn;
- *insnp = lowest;
- n_asms++;
- }
- else
- {
- rtx highest = ready[n_ready - 1];
- ready[n_ready - 1] = insn;
- *insnp = highest;
- if (ia64_final_schedule && group_barrier_needed_p (insn))
- {
- schedule_stop (sched_verbose ? dump : NULL);
- sched_data.last_was_stop = 1;
- maybe_rotate (sched_verbose ? dump : NULL);
- }
-
- return 1;
- }
- }
+ PUT_MODE (insn, VOIDmode);
+ for (next_insn = NEXT_INSN (insn);
+ next_insn != tail;
+ next_insn = NEXT_INSN (next_insn))
+ if (INSN_P (next_insn)
+ && ia64_safe_itanium_class (next_insn) != ITANIUM_CLASS_IGNORE
+ && GET_CODE (PATTERN (next_insn)) != USE
+ && GET_CODE (PATTERN (next_insn)) != CLOBBER)
+ {
+ PUT_MODE (next_insn, TImode);
+ break;
+ }
}
- if (n_asms < n_ready)
+ for (insn = get_next_important_insn (NEXT_INSN (prev_head_insn), tail);
+ insn != NULL_RTX;
+ insn = next_insn)
{
- /* Some normal insns to process. Skip the asms. */
- ready += n_asms;
- n_ready -= n_asms;
- }
- else if (n_ready > 0)
- {
- /* Only asm insns left. */
- if (ia64_final_schedule && group_barrier_needed_p (ready[n_ready - 1]))
+ if (!INSN_P (insn)
+ || ia64_safe_itanium_class (insn) == ITANIUM_CLASS_IGNORE
+ || GET_CODE (PATTERN (insn)) == USE
+ || GET_CODE (PATTERN (insn)) == CLOBBER)
+ abort ();
+ next_insn = get_next_important_insn (NEXT_INSN (insn), tail);
+ insn_num++;
+ index_to_bundle_states [insn_num] = NULL;
+ for (curr_state = index_to_bundle_states [insn_num - 1];
+ curr_state != NULL;
+ curr_state = next_state)
{
- schedule_stop (sched_verbose ? dump : NULL);
- sched_data.last_was_stop = 1;
- maybe_rotate (sched_verbose ? dump : NULL);
+ pos = curr_state->accumulated_insns_num % 3;
+ type = ia64_safe_type (insn);
+ next_state = curr_state->next;
+ bundle_end_p
+ = (next_insn == NULL_RTX
+ || (GET_MODE (next_insn) == TImode
+ && INSN_CODE (insn) != CODE_FOR_insn_group_barrier));
+ if (type == TYPE_F || type == TYPE_B || type == TYPE_L
+ || type == TYPE_S
+ /* We need to insert 2 Nops for cases like M_MII. */
+ || (type == TYPE_M && ia64_tune == PROCESSOR_ITANIUM
+ && !bundle_end_p && pos == 1))
+ issue_nops_and_insn (curr_state, 2, insn, bundle_end_p);
+ issue_nops_and_insn (curr_state, 1, insn, bundle_end_p);
+ issue_nops_and_insn (curr_state, 0, insn, bundle_end_p);
}
- cycle_end_fill_slots (sched_verbose ? dump : NULL);
- return 1;
+ if (index_to_bundle_states [insn_num] == NULL)
+ abort ();
+ for (curr_state = index_to_bundle_states [insn_num];
+ curr_state != NULL;
+ curr_state = curr_state->next)
+ if (verbose >= 2 && dump)
+ {
+ struct DFA_chip
+ {
+ unsigned short one_automaton_state;
+ unsigned short oneb_automaton_state;
+ unsigned short two_automaton_state;
+ unsigned short twob_automaton_state;
+ };
+
+ fprintf
+ (dump,
+ "// Bundle state %d (orig %d, cost %d, nops %d/%d, insns %d, branch %d, state %d) for %d\n",
+ curr_state->unique_num,
+ (curr_state->originator == NULL
+ ? -1 : curr_state->originator->unique_num),
+ curr_state->cost,
+ curr_state->before_nops_num, curr_state->after_nops_num,
+ curr_state->accumulated_insns_num, curr_state->branch_deviation,
+ (ia64_tune == PROCESSOR_ITANIUM
+ ? ((struct DFA_chip *) curr_state->dfa_state)->oneb_automaton_state
+ : ((struct DFA_chip *) curr_state->dfa_state)->twob_automaton_state),
+ INSN_UID (insn));
+ }
}
-
- if (ia64_final_schedule)
- {
- int nr_need_stop = 0;
-
- for (insnp = ready; insnp < e_ready; insnp++)
- if (safe_group_barrier_needed_p (*insnp))
- nr_need_stop++;
-
- /* Schedule a stop bit if
- - all insns require a stop bit, or
- - we are starting a new cycle and _any_ insns require a stop bit.
- The reason for the latter is that if our schedule is accurate, then
- the additional stop won't decrease performance at this point (since
- there's a split issue at this point anyway), but it gives us more
- freedom when scheduling the currently ready insns. */
- if ((reorder_type == 0 && nr_need_stop)
- || (reorder_type == 1 && n_ready == nr_need_stop))
+ if (index_to_bundle_states [insn_num] == NULL)
+ abort ();
+ /* Finding state with a minimal cost: */
+ best_state = NULL;
+ for (curr_state = index_to_bundle_states [insn_num];
+ curr_state != NULL;
+ curr_state = curr_state->next)
+ if (curr_state->accumulated_insns_num % 3 == 0
+ && (best_state == NULL || best_state->cost > curr_state->cost
+ || (best_state->cost == curr_state->cost
+ && (curr_state->accumulated_insns_num
+ < best_state->accumulated_insns_num
+ || (curr_state->accumulated_insns_num
+ == best_state->accumulated_insns_num
+ && curr_state->branch_deviation
+ < best_state->branch_deviation)))))
+ best_state = curr_state;
+ /* Second (backward) pass: adding nops and templates: */
+ insn_num = best_state->before_nops_num;
+ template0 = template1 = -1;
+ for (curr_state = best_state;
+ curr_state->originator != NULL;
+ curr_state = curr_state->originator)
+ {
+ insn = curr_state->insn;
+ insn_num++;
+ if (verbose >= 2 && dump)
{
- schedule_stop (sched_verbose ? dump : NULL);
- sched_data.last_was_stop = 1;
- maybe_rotate (sched_verbose ? dump : NULL);
- if (reorder_type == 1)
- return 0;
+ struct DFA_chip
+ {
+ unsigned short one_automaton_state;
+ unsigned short oneb_automaton_state;
+ unsigned short two_automaton_state;
+ unsigned short twob_automaton_state;
+ };
+
+ fprintf
+ (dump,
+ "// Best %d (orig %d, cost %d, nops %d/%d, insns %d, branch %d, state %d) for %d\n",
+ curr_state->unique_num,
+ (curr_state->originator == NULL
+ ? -1 : curr_state->originator->unique_num),
+ curr_state->cost,
+ curr_state->before_nops_num, curr_state->after_nops_num,
+ curr_state->accumulated_insns_num, curr_state->branch_deviation,
+ (ia64_tune == PROCESSOR_ITANIUM
+ ? ((struct DFA_chip *) curr_state->dfa_state)->oneb_automaton_state
+ : ((struct DFA_chip *) curr_state->dfa_state)->twob_automaton_state),
+ INSN_UID (insn));
}
- else
+ max_pos = get_max_pos (curr_state->dfa_state);
+ if (max_pos == 6 || (max_pos == 3 && template0 < 0))
{
- int deleted = 0;
- insnp = e_ready;
- /* Move down everything that needs a stop bit, preserving relative
- order. */
- while (insnp-- > ready + deleted)
- while (insnp >= ready + deleted)
- {
- rtx insn = *insnp;
- if (! safe_group_barrier_needed_p (insn))
- break;
- memmove (ready + 1, ready, (insnp - ready) * sizeof (rtx));
- *ready = insn;
- deleted++;
- }
- n_ready -= deleted;
- ready += deleted;
- if (deleted != nr_need_stop)
+ pos = max_pos;
+ if (max_pos == 3)
+ template0 = get_template (curr_state->dfa_state, 3);
+ else
+ {
+ template1 = get_template (curr_state->dfa_state, 3);
+ template0 = get_template (curr_state->dfa_state, 6);
+ }
+ }
+ if (max_pos > 3 && template1 < 0)
+ {
+ if (pos > 3)
abort ();
+ template1 = get_template (curr_state->dfa_state, 3);
+ pos += 3;
+ }
+ for (i = 0; i < curr_state->after_nops_num; i++)
+ {
+ nop = gen_nop ();
+ emit_insn_after (nop, insn);
+ pos--;
+ if (pos < 0)
+ abort ();
+ if (pos % 3 == 0)
+ {
+ if (template0 < 0)
+ abort ();
+ b = gen_bundle_selector (GEN_INT (template0));
+ ia64_emit_insn_before (b, nop);
+ template0 = template1;
+ template1 = -1;
+ }
+ }
+ if (INSN_CODE (insn) != CODE_FOR_insn_group_barrier
+ && GET_CODE (PATTERN (insn)) != ASM_INPUT
+ && asm_noperands (PATTERN (insn)) < 0)
+ pos--;
+ if (ia64_safe_type (insn) == TYPE_L)
+ pos--;
+ if (pos < 0)
+ abort ();
+ if (pos % 3 == 0
+ && INSN_CODE (insn) != CODE_FOR_insn_group_barrier
+ && GET_CODE (PATTERN (insn)) != ASM_INPUT
+ && asm_noperands (PATTERN (insn)) < 0)
+ {
+ if (template0 < 0)
+ abort ();
+ b = gen_bundle_selector (GEN_INT (template0));
+ ia64_emit_insn_before (b, insn);
+ b = PREV_INSN (insn);
+ insn = b;
+ template0 = template1;
+ template1 = -1;
+ }
+ for (i = 0; i < curr_state->before_nops_num; i++)
+ {
+ nop = gen_nop ();
+ ia64_emit_insn_before (nop, insn);
+ nop = PREV_INSN (insn);
+ insn = nop;
+ pos--;
+ if (pos < 0)
+ abort ();
+ if (pos % 3 == 0)
+ {
+ if (template0 < 0)
+ abort ();
+ b = gen_bundle_selector (GEN_INT (template0));
+ ia64_emit_insn_before (b, insn);
+ b = PREV_INSN (insn);
+ insn = b;
+ template0 = template1;
+ template1 = -1;
+ }
}
}
-
- return itanium_reorder (sched_verbose ? dump : NULL,
- ready, e_ready, reorder_type == 1);
+ if (ia64_tune == PROCESSOR_ITANIUM)
+ /* Insert additional cycles for MM-insns: */
+ for (insn = get_next_important_insn (NEXT_INSN (prev_head_insn), tail);
+ insn != NULL_RTX;
+ insn = next_insn)
+ {
+ if (!INSN_P (insn)
+ || ia64_safe_itanium_class (insn) == ITANIUM_CLASS_IGNORE
+ || GET_CODE (PATTERN (insn)) == USE
+ || GET_CODE (PATTERN (insn)) == CLOBBER)
+ abort ();
+ next_insn = get_next_important_insn (NEXT_INSN (insn), tail);
+ if (INSN_UID (insn) < clocks_length && add_cycles [INSN_UID (insn)])
+ {
+ rtx last;
+ int i, j, n;
+ int pred_stop_p;
+
+ last = prev_active_insn (insn);
+ pred_stop_p = recog_memoized (last) == CODE_FOR_insn_group_barrier;
+ if (pred_stop_p)
+ last = prev_active_insn (last);
+ n = 0;
+ for (;; last = prev_active_insn (last))
+ if (recog_memoized (last) == CODE_FOR_bundle_selector)
+ {
+ template0 = XINT (XVECEXP (PATTERN (last), 0, 0), 0);
+ if (template0 == 9)
+ PATTERN (last)
+ = gen_bundle_selector (GEN_INT (2)); /* -> MFI */
+ break;
+ }
+ else if (recog_memoized (last) != CODE_FOR_insn_group_barrier)
+ n++;
+ if ((pred_stop_p && n == 0) || n > 2
+ || (template0 == 9 && n != 0))
+ abort ();
+ for (j = 3 - n; j > 0; j --)
+ ia64_emit_insn_before (gen_nop (), insn);
+ add_cycles [INSN_UID (insn)]--;
+ if (!pred_stop_p || add_cycles [INSN_UID (insn)])
+ ia64_emit_insn_before (gen_insn_group_barrier (GEN_INT (3)),
+ insn);
+ if (pred_stop_p)
+ add_cycles [INSN_UID (insn)]--;
+ for (i = add_cycles [INSN_UID (insn)]; i > 0; i--)
+ {
+ /* Insert .MII bundle. */
+ ia64_emit_insn_before (gen_bundle_selector (GEN_INT (0)),
+ insn);
+ ia64_emit_insn_before (gen_nop (), insn);
+ ia64_emit_insn_before (gen_nop (), insn);
+ if (i > 1)
+ {
+ ia64_emit_insn_before
+ (gen_insn_group_barrier (GEN_INT (3)), insn);
+ i--;
+ }
+ ia64_emit_insn_before (gen_nop (), insn);
+ ia64_emit_insn_before (gen_insn_group_barrier (GEN_INT (3)),
+ insn);
+ }
+ ia64_emit_insn_before (gen_bundle_selector (GEN_INT (template0)),
+ insn);
+ for (j = n; j > 0; j --)
+ ia64_emit_insn_before (gen_nop (), insn);
+ if (pred_stop_p)
+ ia64_emit_insn_before (gen_insn_group_barrier (GEN_INT (3)),
+ insn);
+ }
+ }
+ free (index_to_bundle_states);
+ finish_bundle_state_table ();
+ bundling_p = 0;
+ dfa_clean_insn_cache ();
}
-static int
-ia64_sched_reorder (dump, sched_verbose, ready, pn_ready, clock_var)
+/* The following function is called at the end of scheduling BB or
+ EBB. After reload, it inserts stop bits and does insn bundling. */
+
+static void
+ia64_sched_finish (dump, sched_verbose)
FILE *dump;
int sched_verbose;
- rtx *ready;
- int *pn_ready;
- int clock_var;
{
- return ia64_internal_sched_reorder (dump, sched_verbose, ready,
- pn_ready, 0, clock_var);
+ if (sched_verbose)
+ fprintf (dump, "// Finishing schedule.\n");
+ if (!reload_completed)
+ return;
+ if (reload_completed)
+ {
+ final_emit_insn_group_barriers (dump);
+ bundling (dump, sched_verbose, current_sched_info->prev_head,
+ current_sched_info->next_tail);
+ if (sched_verbose && dump)
+ fprintf (dump, "// finishing %d-%d\n",
+ INSN_UID (NEXT_INSN (current_sched_info->prev_head)),
+ INSN_UID (PREV_INSN (current_sched_info->next_tail)));
+
+ return;
+ }
}
-/* Like ia64_sched_reorder, but called after issuing each insn.
- Override the default sort algorithm to better slot instructions. */
+/* The following function inserts stop bits in scheduled BB or EBB. */
-static int
-ia64_sched_reorder2 (dump, sched_verbose, ready, pn_ready, clock_var)
+static void
+final_emit_insn_group_barriers (dump)
FILE *dump ATTRIBUTE_UNUSED;
- int sched_verbose ATTRIBUTE_UNUSED;
- rtx *ready;
- int *pn_ready;
- int clock_var;
{
- if (sched_data.last_was_stop)
- return 0;
+ rtx insn;
+ int need_barrier_p = 0;
+ rtx prev_insn = NULL_RTX;
- /* Detect one special case and try to optimize it.
- If we have 1.M;;MI 2.MIx, and slots 2.1 (M) and 2.2 (I) are both NOPs,
- then we can get better code by transforming this to 1.MFB;; 2.MIx. */
- if (sched_data.first_slot == 1
- && sched_data.stopbit[0]
- && ((sched_data.cur == 4
- && (sched_data.types[1] == TYPE_M || sched_data.types[1] == TYPE_A)
- && (sched_data.types[2] == TYPE_I || sched_data.types[2] == TYPE_A)
- && (sched_data.types[3] != TYPE_M && sched_data.types[3] != TYPE_A))
- || (sched_data.cur == 3
- && (sched_data.types[1] == TYPE_M
- || sched_data.types[1] == TYPE_A)
- && (sched_data.types[2] != TYPE_M
- && sched_data.types[2] != TYPE_I
- && sched_data.types[2] != TYPE_A))))
-
- {
- int i, best;
- rtx stop = sched_data.insns[1];
+ init_insn_group_barriers ();
- /* Search backward for the stop bit that must be there. */
- while (1)
+ for (insn = NEXT_INSN (current_sched_info->prev_head);
+ insn != current_sched_info->next_tail;
+ insn = NEXT_INSN (insn))
+ {
+ if (GET_CODE (insn) == BARRIER)
{
- int insn_code;
-
- stop = PREV_INSN (stop);
- if (GET_CODE (stop) != INSN)
- abort ();
- insn_code = recog_memoized (stop);
-
- /* Ignore .pred.rel.mutex.
+ rtx last = prev_active_insn (insn);
- ??? Update this to ignore cycle display notes too
- ??? once those are implemented */
- if (insn_code == CODE_FOR_pred_rel_mutex
- || insn_code == CODE_FOR_prologue_use)
+ if (! last)
continue;
+ if (GET_CODE (last) == JUMP_INSN
+ && GET_CODE (PATTERN (last)) == ADDR_DIFF_VEC)
+ last = prev_active_insn (last);
+ if (recog_memoized (last) != CODE_FOR_insn_group_barrier)
+ emit_insn_after (gen_insn_group_barrier (GEN_INT (3)), last);
- if (insn_code == CODE_FOR_insn_group_barrier)
- break;
- abort ();
+ init_insn_group_barriers ();
+ need_barrier_p = 0;
+ prev_insn = NULL_RTX;
}
-
- /* Adjust the stop bit's slot selector. */
- if (INTVAL (XVECEXP (PATTERN (stop), 0, 0)) != 1)
- abort ();
- XVECEXP (PATTERN (stop), 0, 0) = GEN_INT (3);
-
- sched_data.stopbit[0] = 0;
- sched_data.stopbit[2] = 1;
-
- sched_data.types[5] = sched_data.types[3];
- sched_data.types[4] = sched_data.types[2];
- sched_data.types[3] = sched_data.types[1];
- sched_data.insns[5] = sched_data.insns[3];
- sched_data.insns[4] = sched_data.insns[2];
- sched_data.insns[3] = sched_data.insns[1];
- sched_data.stopbit[5] = sched_data.stopbit[4] = sched_data.stopbit[3] = 0;
- sched_data.cur += 2;
- sched_data.first_slot = 3;
- for (i = 0; i < NR_PACKETS; i++)
+ else if (INSN_P (insn))
{
- const struct ia64_packet *p = packets + i;
- if (p->t[0] == TYPE_M && p->t[1] == TYPE_F && p->t[2] == TYPE_B)
+ if (recog_memoized (insn) == CODE_FOR_insn_group_barrier)
{
- sched_data.packet = p;
- break;
+ init_insn_group_barriers ();
+ need_barrier_p = 0;
+ prev_insn = NULL_RTX;
}
- }
- rotate_one_bundle (sched_verbose ? dump : NULL);
-
- best = 6;
- for (i = 0; i < NR_PACKETS; i++)
- {
- const struct ia64_packet *p = packets + i;
- int split = get_split (p, sched_data.first_slot);
- int next;
-
- /* Disallow multiway branches here. */
- if (p->t[1] == TYPE_B)
- continue;
-
- if (packet_matches_p (p, split, &next) && next < best)
+ else if (need_barrier_p || group_barrier_needed_p (insn))
{
- best = next;
- sched_data.packet = p;
- sched_data.split = split;
+ if (TARGET_EARLY_STOP_BITS)
+ {
+ rtx last;
+
+ for (last = insn;
+ last != current_sched_info->prev_head;
+ last = PREV_INSN (last))
+ if (INSN_P (last) && GET_MODE (last) == TImode
+ && stops_p [INSN_UID (last)])
+ break;
+ if (last == current_sched_info->prev_head)
+ last = insn;
+ last = prev_active_insn (last);
+ if (last
+ && recog_memoized (last) != CODE_FOR_insn_group_barrier)
+ emit_insn_after (gen_insn_group_barrier (GEN_INT (3)),
+ last);
+ init_insn_group_barriers ();
+ for (last = NEXT_INSN (last);
+ last != insn;
+ last = NEXT_INSN (last))
+ if (INSN_P (last))
+ group_barrier_needed_p (last);
+ }
+ else
+ {
+ emit_insn_before (gen_insn_group_barrier (GEN_INT (3)),
+ insn);
+ init_insn_group_barriers ();
+ }
+ group_barrier_needed_p (insn);
+ prev_insn = NULL_RTX;
}
+ else if (recog_memoized (insn) >= 0)
+ prev_insn = insn;
+ need_barrier_p = (GET_CODE (insn) == CALL_INSN
+ || GET_CODE (PATTERN (insn)) == ASM_INPUT
+ || asm_noperands (PATTERN (insn)) >= 0);
}
- if (best == 6)
- abort ();
}
+}
- if (*pn_ready > 0)
- {
- int more = ia64_internal_sched_reorder (dump, sched_verbose,
- ready, pn_ready, 1,
- clock_var);
- if (more)
- return more;
- /* Did we schedule a stop? If so, finish this cycle. */
- if (sched_data.cur == sched_data.first_slot)
- return 0;
- }
+
- if (sched_verbose)
- fprintf (dump, "// Can't issue more this cycle; updating type array.\n");
+/* If the following function returns TRUE, we will use the the DFA
+ insn scheduler. */
- cycle_end_fill_slots (sched_verbose ? dump : NULL);
- if (sched_verbose)
- dump_current_packet (dump);
- return 0;
+static int
+ia64_use_dfa_pipeline_interface ()
+{
+ return 1;
}
-/* We are about to issue INSN. Return the number of insns left on the
- ready queue that can be issued this cycle. */
+/* If the following function returns TRUE, we will use the the DFA
+ insn scheduler. */
static int
-ia64_variable_issue (dump, sched_verbose, insn, can_issue_more)
- FILE *dump;
- int sched_verbose;
- rtx insn;
- int can_issue_more ATTRIBUTE_UNUSED;
+ia64_first_cycle_multipass_dfa_lookahead ()
{
- enum attr_type t = ia64_safe_type (insn);
+ return (reload_completed ? 6 : 4);
+}
- if (sched_data.last_was_stop)
- {
- int t = sched_data.first_slot;
- if (t == 0)
- t = 3;
- ia64_emit_insn_before (gen_insn_group_barrier (GEN_INT (t)), insn);
- init_insn_group_barriers ();
- sched_data.last_was_stop = 0;
- }
+/* The following function initiates variable `dfa_pre_cycle_insn'. */
- if (t == TYPE_UNKNOWN)
+static void
+ia64_init_dfa_pre_cycle_insn ()
+{
+ if (temp_dfa_state == NULL)
{
- if (sched_verbose)
- fprintf (dump, "// Ignoring type %s\n", type_names[t]);
- if (GET_CODE (PATTERN (insn)) == ASM_INPUT
- || asm_noperands (PATTERN (insn)) >= 0)
- {
- /* This must be some kind of asm. Clear the scheduling state. */
- rotate_two_bundles (sched_verbose ? dump : NULL);
- if (ia64_final_schedule)
- group_barrier_needed_p (insn);
- }
- return 1;
+ dfa_state_size = state_size ();
+ temp_dfa_state = xmalloc (dfa_state_size);
+ prev_cycle_state = xmalloc (dfa_state_size);
}
+ dfa_pre_cycle_insn = make_insn_raw (gen_pre_cycle ());
+ PREV_INSN (dfa_pre_cycle_insn) = NEXT_INSN (dfa_pre_cycle_insn) = NULL_RTX;
+ recog_memoized (dfa_pre_cycle_insn);
+ dfa_stop_insn = make_insn_raw (gen_insn_group_barrier (GEN_INT (3)));
+ PREV_INSN (dfa_stop_insn) = NEXT_INSN (dfa_stop_insn) = NULL_RTX;
+ recog_memoized (dfa_stop_insn);
+}
- /* This is _not_ just a sanity check. group_barrier_needed_p will update
- important state info. Don't delete this test. */
- if (ia64_final_schedule
- && group_barrier_needed_p (insn))
- abort ();
+/* The following function returns the pseudo insn DFA_PRE_CYCLE_INSN
+ used by the DFA insn scheduler. */
- sched_data.stopbit[sched_data.cur] = 0;
- sched_data.insns[sched_data.cur] = insn;
- sched_data.types[sched_data.cur] = t;
+static rtx
+ia64_dfa_pre_cycle_insn ()
+{
+ return dfa_pre_cycle_insn;
+}
- sched_data.cur++;
- if (sched_verbose)
- fprintf (dump, "// Scheduling insn %d of type %s\n",
- INSN_UID (insn), type_names[t]);
+/* The following function returns TRUE if PRODUCER (of type ilog or
+ ld) produces address for CONSUMER (of type st or stf). */
- if (GET_CODE (insn) == CALL_INSN && ia64_final_schedule)
- {
- schedule_stop (sched_verbose ? dump : NULL);
- sched_data.last_was_stop = 1;
- }
+int
+ia64_st_address_bypass_p (producer, consumer)
+ rtx producer;
+ rtx consumer;
+{
+ rtx dest, reg, mem;
- return 1;
+ if (producer == NULL_RTX || consumer == NULL_RTX)
+ abort ();
+ dest = ia64_single_set (producer);
+ if (dest == NULL_RTX || (reg = SET_DEST (dest)) == NULL_RTX
+ || (GET_CODE (reg) != REG && GET_CODE (reg) != SUBREG))
+ abort ();
+ if (GET_CODE (reg) == SUBREG)
+ reg = SUBREG_REG (reg);
+ dest = ia64_single_set (consumer);
+ if (dest == NULL_RTX || (mem = SET_DEST (dest)) == NULL_RTX
+ || GET_CODE (mem) != MEM)
+ abort ();
+ return reg_mentioned_p (reg, mem);
}
-/* Free data allocated by ia64_sched_init. */
+/* The following function returns TRUE if PRODUCER (of type ilog or
+ ld) produces address for CONSUMER (of type ld or fld). */
-static void
-ia64_sched_finish (dump, sched_verbose)
- FILE *dump;
- int sched_verbose;
+int
+ia64_ld_address_bypass_p (producer, consumer)
+ rtx producer;
+ rtx consumer;
{
- if (sched_verbose)
- fprintf (dump, "// Finishing schedule.\n");
- rotate_two_bundles (NULL);
- free (sched_types);
- free (sched_ready);
+ rtx dest, src, reg, mem;
+
+ if (producer == NULL_RTX || consumer == NULL_RTX)
+ abort ();
+ dest = ia64_single_set (producer);
+ if (dest == NULL_RTX || (reg = SET_DEST (dest)) == NULL_RTX
+ || (GET_CODE (reg) != REG && GET_CODE (reg) != SUBREG))
+ abort ();
+ if (GET_CODE (reg) == SUBREG)
+ reg = SUBREG_REG (reg);
+ src = ia64_single_set (consumer);
+ if (src == NULL_RTX || (mem = SET_SRC (src)) == NULL_RTX)
+ abort ();
+ if (GET_CODE (mem) == UNSPEC && XVECLEN (mem, 0) > 0)
+ mem = XVECEXP (mem, 0, 0);
+ while (GET_CODE (mem) == SUBREG || GET_CODE (mem) == ZERO_EXTEND)
+ mem = XEXP (mem, 0);
+ if (GET_CODE (mem) != MEM)
+ abort ();
+ return reg_mentioned_p (reg, mem);
+}
+
+/* The following function returns TRUE if INSN produces address for a
+ load/store insn. We will place such insns into M slot because it
+ decreases its latency time. */
+
+int
+ia64_produce_address_p (insn)
+ rtx insn;
+{
+ return insn->call;
}
+
/* Emit pseudo-ops for the assembler to describe predicate relations.
At present this assumes that we only consider predicate pairs to
@@ -6887,111 +7006,6 @@ emit_predicate_relation_info ()
}
}
-/* Generate a NOP instruction of type T. We will never generate L type
- nops. */
-
-static rtx
-gen_nop_type (t)
- enum attr_type t;
-{
- switch (t)
- {
- case TYPE_M:
- return gen_nop_m ();
- case TYPE_I:
- return gen_nop_i ();
- case TYPE_B:
- return gen_nop_b ();
- case TYPE_F:
- return gen_nop_f ();
- case TYPE_X:
- return gen_nop_x ();
- default:
- abort ();
- }
-}
-
-/* After the last scheduling pass, fill in NOPs. It's easier to do this
- here than while scheduling. */
-
-static void
-ia64_emit_nops ()
-{
- rtx insn;
- const struct bundle *b = 0;
- int bundle_pos = 0;
-
- for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
- {
- rtx pat;
- enum attr_type t;
- pat = INSN_P (insn) ? PATTERN (insn) : const0_rtx;
- if (GET_CODE (pat) == USE || GET_CODE (pat) == CLOBBER)
- continue;
- if ((GET_CODE (pat) == UNSPEC && XINT (pat, 1) == UNSPEC_BUNDLE_SELECTOR)
- || GET_CODE (insn) == CODE_LABEL)
- {
- if (b)
- while (bundle_pos < 3)
- {
- emit_insn_before (gen_nop_type (b->t[bundle_pos]), insn);
- bundle_pos++;
- }
- if (GET_CODE (insn) != CODE_LABEL)
- b = bundle + INTVAL (XVECEXP (pat, 0, 0));
- else
- b = 0;
- bundle_pos = 0;
- continue;
- }
- else if (GET_CODE (pat) == UNSPEC_VOLATILE
- && XINT (pat, 1) == UNSPECV_INSN_GROUP_BARRIER)
- {
- int t = INTVAL (XVECEXP (pat, 0, 0));
- if (b)
- while (bundle_pos < t)
- {
- emit_insn_before (gen_nop_type (b->t[bundle_pos]), insn);
- bundle_pos++;
- }
- continue;
- }
-
- if (bundle_pos == 3)
- b = 0;
-
- if (b && INSN_P (insn))
- {
- t = ia64_safe_type (insn);
- if (asm_noperands (PATTERN (insn)) >= 0
- || GET_CODE (PATTERN (insn)) == ASM_INPUT)
- {
- while (bundle_pos < 3)
- {
- emit_insn_before (gen_nop_type (b->t[bundle_pos]), insn);
- bundle_pos++;
- }
- continue;
- }
-
- if (t == TYPE_UNKNOWN)
- continue;
- while (bundle_pos < 3)
- {
- if (t == b->t[bundle_pos]
- || (t == TYPE_A && (b->t[bundle_pos] == TYPE_M
- || b->t[bundle_pos] == TYPE_I)))
- break;
-
- emit_insn_before (gen_nop_type (b->t[bundle_pos]), insn);
- bundle_pos++;
- }
- if (bundle_pos < 3)
- bundle_pos++;
- }
- }
-}
-
/* Perform machine dependent operations on the rtl chain INSNS. */
void
@@ -7014,14 +7028,91 @@ ia64_reorg (insns)
{
timevar_push (TV_SCHED2);
ia64_final_schedule = 1;
+
+ initiate_bundle_states ();
+ ia64_nop = make_insn_raw (gen_nop ());
+ PREV_INSN (ia64_nop) = NEXT_INSN (ia64_nop) = NULL_RTX;
+ recog_memoized (ia64_nop);
+ clocks_length = get_max_uid () + 1;
+ stops_p = (char *) xmalloc (clocks_length);
+ memset (stops_p, 0, clocks_length);
+ if (ia64_tune == PROCESSOR_ITANIUM)
+ {
+ clocks = (int *) xmalloc (clocks_length * sizeof (int));
+ memset (clocks, 0, clocks_length * sizeof (int));
+ add_cycles = (int *) xmalloc (clocks_length * sizeof (int));
+ memset (add_cycles, 0, clocks_length * sizeof (int));
+ }
+ if (ia64_tune == PROCESSOR_ITANIUM2)
+ {
+ pos_1 = get_cpu_unit_code ("2_1");
+ pos_2 = get_cpu_unit_code ("2_2");
+ pos_3 = get_cpu_unit_code ("2_3");
+ pos_4 = get_cpu_unit_code ("2_4");
+ pos_5 = get_cpu_unit_code ("2_5");
+ pos_6 = get_cpu_unit_code ("2_6");
+ _0mii_ = get_cpu_unit_code ("2b_0mii.");
+ _0mmi_ = get_cpu_unit_code ("2b_0mmi.");
+ _0mfi_ = get_cpu_unit_code ("2b_0mfi.");
+ _0mmf_ = get_cpu_unit_code ("2b_0mmf.");
+ _0bbb_ = get_cpu_unit_code ("2b_0bbb.");
+ _0mbb_ = get_cpu_unit_code ("2b_0mbb.");
+ _0mib_ = get_cpu_unit_code ("2b_0mib.");
+ _0mmb_ = get_cpu_unit_code ("2b_0mmb.");
+ _0mfb_ = get_cpu_unit_code ("2b_0mfb.");
+ _0mlx_ = get_cpu_unit_code ("2b_0mlx.");
+ _1mii_ = get_cpu_unit_code ("2b_1mii.");
+ _1mmi_ = get_cpu_unit_code ("2b_1mmi.");
+ _1mfi_ = get_cpu_unit_code ("2b_1mfi.");
+ _1mmf_ = get_cpu_unit_code ("2b_1mmf.");
+ _1bbb_ = get_cpu_unit_code ("2b_1bbb.");
+ _1mbb_ = get_cpu_unit_code ("2b_1mbb.");
+ _1mib_ = get_cpu_unit_code ("2b_1mib.");
+ _1mmb_ = get_cpu_unit_code ("2b_1mmb.");
+ _1mfb_ = get_cpu_unit_code ("2b_1mfb.");
+ _1mlx_ = get_cpu_unit_code ("2b_1mlx.");
+ }
+ else
+ {
+ pos_1 = get_cpu_unit_code ("1_1");
+ pos_2 = get_cpu_unit_code ("1_2");
+ pos_3 = get_cpu_unit_code ("1_3");
+ pos_4 = get_cpu_unit_code ("1_4");
+ pos_5 = get_cpu_unit_code ("1_5");
+ pos_6 = get_cpu_unit_code ("1_6");
+ _0mii_ = get_cpu_unit_code ("1b_0mii.");
+ _0mmi_ = get_cpu_unit_code ("1b_0mmi.");
+ _0mfi_ = get_cpu_unit_code ("1b_0mfi.");
+ _0mmf_ = get_cpu_unit_code ("1b_0mmf.");
+ _0bbb_ = get_cpu_unit_code ("1b_0bbb.");
+ _0mbb_ = get_cpu_unit_code ("1b_0mbb.");
+ _0mib_ = get_cpu_unit_code ("1b_0mib.");
+ _0mmb_ = get_cpu_unit_code ("1b_0mmb.");
+ _0mfb_ = get_cpu_unit_code ("1b_0mfb.");
+ _0mlx_ = get_cpu_unit_code ("1b_0mlx.");
+ _1mii_ = get_cpu_unit_code ("1b_1mii.");
+ _1mmi_ = get_cpu_unit_code ("1b_1mmi.");
+ _1mfi_ = get_cpu_unit_code ("1b_1mfi.");
+ _1mmf_ = get_cpu_unit_code ("1b_1mmf.");
+ _1bbb_ = get_cpu_unit_code ("1b_1bbb.");
+ _1mbb_ = get_cpu_unit_code ("1b_1mbb.");
+ _1mib_ = get_cpu_unit_code ("1b_1mib.");
+ _1mmb_ = get_cpu_unit_code ("1b_1mmb.");
+ _1mfb_ = get_cpu_unit_code ("1b_1mfb.");
+ _1mlx_ = get_cpu_unit_code ("1b_1mlx.");
+ }
schedule_ebbs (rtl_dump_file);
+ finish_bundle_states ();
+ if (ia64_tune == PROCESSOR_ITANIUM)
+ {
+ free (add_cycles);
+ free (clocks);
+ }
+ free (stops_p);
+ emit_insn_group_barriers (rtl_dump_file, insns);
+
ia64_final_schedule = 0;
timevar_pop (TV_SCHED2);
-
- /* This relies on the NOTE_INSN_BASIC_BLOCK notes to be in the same
- place as they were during scheduling. */
- emit_insn_group_barriers (rtl_dump_file, insns);
- ia64_emit_nops ();
}
else
emit_all_insn_group_barriers (rtl_dump_file, insns);