summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorthopre01 <thopre01@138bc75d-0d04-0410-961f-82ee72b054a4>2015-04-28 08:10:44 +0000
committerthopre01 <thopre01@138bc75d-0d04-0410-961f-82ee72b054a4>2015-04-28 08:10:44 +0000
commitee7ef7abf6b938b36acffc34f15ca5097b63702a (patch)
treefeebbec54d03722d68e3d93fcfbe99957828583a
parent562b4de26b344a1357531d8f215efc0e8f8ab9a9 (diff)
downloadgcc-ee7ef7abf6b938b36acffc34f15ca5097b63702a.tar.gz
2015-04-28 Thomas Preud'homme <thomas.preudhomme@arm.com>
gcc/ PR target/63503 * config.gcc: Add cortex-a57-fma-steering.o to extra_objs for aarch64-*-*. * config/aarch64/t-aarch64: Add a rule for cortex-a57-fma-steering.o. * config/aarch64/aarch64.h (AARCH64_FL_USE_FMA_STEERING_PASS): Define. (AARCH64_TUNE_FMA_STEERING): Likewise. * config/aarch64/aarch64-cores.def: Set AARCH64_FL_USE_FMA_STEERING_PASS for cores with dynamic steering of FMUL/FMADD instructions. * config/aarch64/aarch64.c (aarch64_register_fma_steering): Declare. (aarch64_override_options): Include cortex-a57-fma-steering.h. Call aarch64_register_fma_steering () if AARCH64_TUNE_FMA_STEERING is true. * config/aarch64/cortex-a57-fma-steering.h: New file. * config/aarch64/cortex-a57-fma-steering.c: Likewise. git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@222512 138bc75d-0d04-0410-961f-82ee72b054a4
-rw-r--r--gcc/ChangeLog17
-rw-r--r--gcc/config.gcc2
-rw-r--r--gcc/config/aarch64/aarch64-cores.def4
-rw-r--r--gcc/config/aarch64/aarch64.c4
-rw-r--r--gcc/config/aarch64/aarch64.h4
-rw-r--r--gcc/config/aarch64/cortex-a57-fma-steering.c1088
-rw-r--r--gcc/config/aarch64/cortex-a57-fma-steering.h22
-rw-r--r--gcc/config/aarch64/t-aarch649
8 files changed, 1147 insertions, 3 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 66d86d9532b..070a150b31d 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,20 @@
+2015-04-28 Thomas Preud'homme <thomas.preudhomme@arm.com>
+
+ PR target/63503
+ * config.gcc: Add cortex-a57-fma-steering.o to extra_objs for
+ aarch64-*-*.
+ * config/aarch64/t-aarch64: Add a rule for cortex-a57-fma-steering.o.
+ * config/aarch64/aarch64.h (AARCH64_FL_USE_FMA_STEERING_PASS): Define.
+ (AARCH64_TUNE_FMA_STEERING): Likewise.
+ * config/aarch64/aarch64-cores.def: Set
+ AARCH64_FL_USE_FMA_STEERING_PASS for cores with dynamic steering of
+ FMUL/FMADD instructions.
+ * config/aarch64/aarch64.c (aarch64_register_fma_steering): Declare.
+ (aarch64_override_options): Include cortex-a57-fma-steering.h. Call
+ aarch64_register_fma_steering () if AARCH64_TUNE_FMA_STEERING is true.
+ * config/aarch64/cortex-a57-fma-steering.h: New file.
+ * config/aarch64/cortex-a57-fma-steering.c: Likewise.
+
2015-04-28 Richard Sandiford <richard.sandiford@arm.com>
* gensupport.c (std_preds): Add missing codes to address_operand entry.
diff --git a/gcc/config.gcc b/gcc/config.gcc
index 5c48cd0c682..f21421b1670 100644
--- a/gcc/config.gcc
+++ b/gcc/config.gcc
@@ -302,7 +302,7 @@ m32c*-*-*)
aarch64*-*-*)
cpu_type=aarch64
extra_headers="arm_neon.h arm_acle.h"
- extra_objs="aarch64-builtins.o aarch-common.o"
+ extra_objs="aarch64-builtins.o aarch-common.o cortex-a57-fma-steering.o"
target_gtfiles="\$(srcdir)/config/aarch64/aarch64-builtins.c"
target_has_targetm_common=yes
;;
diff --git a/gcc/config/aarch64/aarch64-cores.def b/gcc/config/aarch64/aarch64-cores.def
index 7c285ba52cc..dfc9cc85413 100644
--- a/gcc/config/aarch64/aarch64-cores.def
+++ b/gcc/config/aarch64/aarch64-cores.def
@@ -40,7 +40,7 @@
/* V8 Architecture Processors. */
AARCH64_CORE("cortex-a53", cortexa53, cortexa53, 8, AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC, cortexa53, "0x41", "0xd03")
-AARCH64_CORE("cortex-a57", cortexa57, cortexa57, 8, AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC, cortexa57, "0x41", "0xd07")
+AARCH64_CORE("cortex-a57", cortexa57, cortexa57, 8, AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC | AARCH64_FL_USE_FMA_STEERING_PASS, cortexa57, "0x41", "0xd07")
AARCH64_CORE("cortex-a72", cortexa72, cortexa57, 8, AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC, cortexa57, "0x41", "0xd08")
AARCH64_CORE("exynos-m1", exynosm1, cortexa57, 8, AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, cortexa57, "0x53", "0x001")
AARCH64_CORE("thunderx", thunderx, thunderx, 8, AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderx, "0x43", "0x0a1")
@@ -48,5 +48,5 @@ AARCH64_CORE("xgene1", xgene1, xgene1, 8, AARCH64_FL_FOR_ARCH8, xgen
/* V8 big.LITTLE implementations. */
-AARCH64_CORE("cortex-a57.cortex-a53", cortexa57cortexa53, cortexa53, 8, AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC, cortexa57, "0x41", "0xd07.0xd03")
+AARCH64_CORE("cortex-a57.cortex-a53", cortexa57cortexa53, cortexa53, 8, AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC | AARCH64_FL_USE_FMA_STEERING_PASS, cortexa57, "0x41", "0xd07.0xd03")
AARCH64_CORE("cortex-a72.cortex-a53", cortexa72cortexa53, cortexa53, 8, AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC, cortexa57, "0x41", "0xd08.0xd03")
diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index dd92934bdec..b8a91dc8fe5 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -95,6 +95,7 @@
#include "rtl-iter.h"
#include "tm-constrs.h"
#include "sched-int.h"
+#include "cortex-a57-fma-steering.h"
/* Defined for convenience. */
#define POINTER_BYTES (POINTER_SIZE / BITS_PER_UNIT)
@@ -6884,6 +6885,9 @@ aarch64_override_options (void)
align_functions = aarch64_tune_params->function_align;
}
+ if (AARCH64_TUNE_FMA_STEERING)
+ aarch64_register_fma_steering ();
+
aarch64_override_options_after_change ();
}
diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h
index 1f7187bab9b..3fd1b3f0d32 100644
--- a/gcc/config/aarch64/aarch64.h
+++ b/gcc/config/aarch64/aarch64.h
@@ -200,6 +200,8 @@ extern unsigned aarch64_architecture_version;
#define AARCH64_FL_CRYPTO (1 << 2) /* Has crypto. */
#define AARCH64_FL_SLOWMUL (1 << 3) /* A slow multiply core. */
#define AARCH64_FL_CRC (1 << 4) /* Has CRC. */
+/* Has static dispatch of FMA. */
+#define AARCH64_FL_USE_FMA_STEERING_PASS (1 << 5)
/* Has FP and SIMD. */
#define AARCH64_FL_FPSIMD (AARCH64_FL_FP | AARCH64_FL_SIMD)
@@ -220,6 +222,8 @@ extern unsigned long aarch64_isa_flags;
/* Macros to test tuning flags. */
extern unsigned long aarch64_tune_flags;
#define AARCH64_TUNE_SLOWMUL (aarch64_tune_flags & AARCH64_FL_SLOWMUL)
+#define AARCH64_TUNE_FMA_STEERING \
+ (aarch64_tune_flags & AARCH64_FL_USE_FMA_STEERING_PASS)
/* Crypto is an optional extension to AdvSIMD. */
#define TARGET_CRYPTO (TARGET_SIMD && AARCH64_ISA_CRYPTO)
diff --git a/gcc/config/aarch64/cortex-a57-fma-steering.c b/gcc/config/aarch64/cortex-a57-fma-steering.c
new file mode 100644
index 00000000000..3d7557ac2b4
--- /dev/null
+++ b/gcc/config/aarch64/cortex-a57-fma-steering.c
@@ -0,0 +1,1088 @@
+/* FMA steering optimization pass for Cortex-A57.
+ Copyright (C) 2015 Free Software Foundation, Inc.
+ Contributed by ARM Ltd.
+
+ This file is part of GCC.
+
+ GCC is free software; you can redistribute it and/or modify it
+ under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3, or (at your option)
+ any later version.
+
+ GCC is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with GCC; see the file COPYING3. If not see
+ <http://www.gnu.org/licenses/>. */
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "tm.h"
+#include "regs.h"
+#include "insn-config.h"
+#include "hard-reg-set.h"
+#include "dominance.h"
+#include "cfg.h"
+#include "cfganal.h"
+#include "predict.h"
+#include "basic-block.h"
+#include "insn-attr.h"
+#include "machmode.h"
+#include "recog.h"
+#include "output.h"
+#include "vec.h"
+#include "hash-map.h"
+#include "bitmap.h"
+#include "obstack.h"
+#include "df.h"
+#include "target.h"
+#include "rtl.h"
+#include "context.h"
+#include "tree-pass.h"
+#include "regrename.h"
+#include "cortex-a57-fma-steering.h"
+
+#include <list>
+
+/* For better performance, the destination of FMADD/FMSUB instructions should
+ have the same parity as their accumulator register if the accumulator
+ contains the result of a previous FMUL or FMADD/FMSUB instruction if
+ targetting Cortex-A57 processors. Performance is also increased by
+ otherwise keeping a good balance in the parity of the destination register
+ of FMUL or FMADD/FMSUB.
+
+ This pass ensure that registers are renamed so that these conditions hold.
+ We reuse the existing register renaming facility from regrename.c to build
+ dependency chains and expose candidate registers for renaming.
+
+
+ The algorithm has three steps:
+
+ First, the functions of the register renaming pass are called. These
+ analyze the instructions and produce a list of def/use chains of
+ instructions.
+
+ Next, this information is used to build trees of multiply and
+ multiply-accumulate instructions. The roots of these trees are any
+ multiply, or any multiply-accumulate whose accumulator is not dependent on
+ a multiply or multiply-accumulate instruction. A child is added to the
+ tree where a dependency chain exists between the result of the parent
+ instruction and the accumulator operand of the child, as in the diagram
+ below:
+
+ fmul s2, s0, s1
+ / \
+ fmadd s0, s1, s1, s2 fmadd s4, s1, s1 s2
+ |
+ fmadd s3, s1, s1, s0
+
+ Trees made of a single instruction are permitted.
+
+ Finally, renaming is performed. The parity of the destination register at
+ the root of a tree is checked against the current balance of multiply and
+ multiply-accumulate on each pipeline. If necessary, the root of a tree is
+ renamed, in which case the rest of the tree is then renamed to keep the same
+ parity in the destination registers of all instructions in the tree. */
+
+
+
+/* Forward declarations. */
+class fma_node;
+class fma_root_node;
+class func_fma_steering;
+
+/* Dependencies between FMUL or FMADD/FMSUB instructions and subsequent
+ FMADD/FMSUB instructions form a graph. This is because alternatives can
+ make a register be set by several FMUL or FMADD/FMSUB instructions in
+ different basic blocks and because of loops. For ease of browsing, the
+ connected components of this graph are broken up into forests of trees.
+ Forests are represented by fma_forest objects, contained in the fma_forests
+ list. Using a separate object for the forests allows for a better use of
+ memory as there is some information that is global to each forest, such as
+ the number of FMSUB and FMADD/FMSUB instructions currently scheduled on each
+ floating-point execution pipelines. */
+
+class fma_forest
+{
+public:
+ fma_forest (func_fma_steering *, fma_root_node *, int);
+ ~fma_forest ();
+
+ int get_id ();
+ std::list<fma_root_node *> *get_roots ();
+ func_fma_steering *get_globals ();
+ int get_target_parity ();
+ void fma_node_created (fma_node *);
+ void merge_forest (fma_forest *);
+ void dump_info ();
+ void dispatch ();
+
+private:
+ /* The list of roots that form this forest. */
+ std::list<fma_root_node *> *m_roots;
+
+ /* Target parity the destination register of all FMUL and FMADD/FMSUB
+ instructions in this forest should have. */
+ int m_target_parity;
+
+ /* Link to the instance of func_fma_steering holding data related to the
+ FMA steering of the current function (cfun). */
+ func_fma_steering *m_globals;
+
+ /* Identifier for the forest (used for dumps). */
+ int m_id;
+
+ /* Total number of nodes in the forest (for statistics). */
+ int m_nb_nodes;
+};
+
+class fma_node
+{
+public:
+ fma_node (fma_node *parent, du_chain *chain);
+ ~fma_node ();
+
+ bool root_p ();
+ fma_forest *get_forest ();
+ std::list<fma_node *> *get_children ();
+ rtx_insn *get_insn ();
+ void add_child (fma_node *);
+ int get_parity ();
+ void set_head (du_head *);
+ void rename (fma_forest *);
+ void dump_info (fma_forest *);
+
+protected:
+ /* Root node that lead to this node. */
+ fma_root_node *m_root;
+
+ /* The parent node of this node. If the node belong to a chain with several
+ parent nodes, the first one encountered in a depth-first search is chosen
+ as canonical parent. */
+ fma_node *m_parent;
+
+ /* The list of child nodes. If a chain contains several parent nodes, one is
+ chosen as canonical parent and the others will have no children. */
+ std::list<fma_node *> *m_children;
+
+ /* The associated DU_HEAD chain that the insn represented by this object
+ is (one of) the root of. When a chain contains several roots, the non
+ canonical ones have this field set to NULL. */
+ struct du_head *m_head;
+
+ /* The FMUL or FMADD/FMSUB instruction this object corresponds to. */
+ rtx_insn *m_insn;
+};
+
+class fma_root_node : public fma_node
+{
+public:
+ fma_root_node (func_fma_steering *, du_chain *, int);
+
+ fma_forest *get_forest ();
+ void set_forest (fma_forest *);
+ void dump_info (fma_forest *);
+
+private:
+ /* The forest this node belonged to when it was created. */
+ fma_forest *m_forest;
+};
+
+/* Class holding all data and methods relative to the FMA steering of a given
+ function. The FMA steering pass could then run in parallel for different
+ functions. */
+
+class func_fma_steering
+{
+public:
+ func_fma_steering ();
+ ~func_fma_steering ();
+
+ int get_fpu_balance ();
+ void remove_forest (fma_forest *);
+ bool put_node (fma_node *);
+ void update_balance (int);
+ fma_node *get_fma_node (rtx_insn *);
+ void analyze_fma_fmul_insn (fma_forest *, du_chain *, du_head_p);
+ void execute_fma_steering ();
+
+private:
+ void dfs (void (*) (fma_forest *), void (*) (fma_forest *, fma_root_node *),
+ void (*) (fma_forest *, fma_node *), bool);
+ void analyze ();
+ void rename_fma_trees ();
+
+ /* Mapping between FMUL or FMADD/FMSUB instructions and the associated
+ fma_node object. Used when analyzing an instruction that is a root of
+ a chain to find if such an object was created because this instruction
+ is also a use in another chain. */
+ hash_map<rtx_insn *, fma_node *> *m_insn_fma_head_map;
+
+ /* A list of all the forests in a given function. */
+ std::list<fma_forest *> m_fma_forests;
+
+ /* Balance of FMUL and FMADD/FMSUB instructions between the two FPU
+ pipelines:
+ < 0: more instruction dispatched to the first pipeline
+ == 0: perfect balance
+ > 0: more instruction dispatched to the second pipeline. */
+ int m_fpu_balance;
+
+ /* Identifier for the next forest created. */
+ int m_next_forest_id;
+};
+
+/* Rename the register HEAD->regno in all the insns in the chain HEAD to any
+ register not in the set UNAVAILABLE. Adapted from rename_chains in
+ regrename.c. */
+
+static bool
+rename_single_chain (du_head_p head, HARD_REG_SET *unavailable)
+{
+ int best_new_reg;
+ int n_uses = 0;
+ struct du_chain *tmp;
+ int reg = head->regno;
+ enum reg_class super_class = NO_REGS;
+
+ if (head->cannot_rename)
+ return false;
+
+ if (fixed_regs[reg] || global_regs[reg]
+ || (frame_pointer_needed && reg == HARD_FRAME_POINTER_REGNUM))
+ return false;
+
+ /* Iterate over elements in the chain in order to:
+ 1. Count number of uses, and narrow the set of registers we can
+ use for renaming.
+ 2. Compute the superunion of register classes in this chain. */
+ for (tmp = head->first; tmp; tmp = tmp->next_use)
+ {
+ if (DEBUG_INSN_P (tmp->insn))
+ continue;
+ n_uses++;
+ IOR_COMPL_HARD_REG_SET (*unavailable, reg_class_contents[tmp->cl]);
+ super_class = reg_class_superunion[(int) super_class][(int) tmp->cl];
+ }
+
+ if (n_uses < 1)
+ return false;
+
+ best_new_reg = find_rename_reg (head, super_class, unavailable, reg,
+ false);
+
+ if (dump_file)
+ {
+ fprintf (dump_file, "Register %s in insn %d", reg_names[reg],
+ INSN_UID (head->first->insn));
+ if (head->need_caller_save_reg)
+ fprintf (dump_file, " crosses a call");
+ }
+
+ if (best_new_reg == reg)
+ {
+ if (dump_file)
+ fprintf (dump_file, "; no available better choice\n");
+ return false;
+ }
+
+ if (dump_file)
+ fprintf (dump_file, ", renamed as %s\n", reg_names[best_new_reg]);
+
+ regrename_do_replace (head, best_new_reg);
+ df_set_regs_ever_live (best_new_reg, true);
+ return true;
+}
+
+/* Return whether T is the attribute of a FMADD/FMSUB-like instruction. */
+
+static bool
+is_fmac_op (enum attr_type t)
+{
+ return (t == TYPE_FMACS) || (t == TYPE_FMACD) || (t == TYPE_NEON_FP_MLA_S);
+}
+
+/* Return whether T is the attribute of a FMUL instruction. */
+
+static bool
+is_fmul_op (enum attr_type t)
+{
+ return (t == TYPE_FMULS) || (t == TYPE_FMULD) || (t == TYPE_NEON_FP_MUL_S);
+}
+
+/* Return whether INSN is an FMUL (if FMUL_OK is true) or FMADD/FMSUB
+ instruction. */
+
+static bool
+is_fmul_fmac_insn (rtx_insn *insn, bool fmul_ok)
+{
+ enum attr_type t;
+
+ if (!NONDEBUG_INSN_P (insn))
+ return false;
+
+ if (recog_memoized (insn) < 0)
+ return false;
+
+ /* Only consider chain(s) this instruction is a root of if this is an FMUL or
+ FMADD/FMSUB instruction. This allows to avoid browsing chains of all
+ instructions for FMUL or FMADD/FMSUB in them. */
+ t = get_attr_type (insn);
+ return is_fmac_op (t) || (fmul_ok && is_fmul_op (t));
+}
+
+
+/*
+ * Class fma_forest method definitions.
+ */
+
+fma_forest::fma_forest (func_fma_steering *fma_steer, fma_root_node *fma_root,
+ int id)
+{
+ memset (this, 0, sizeof (*this));
+ this->m_globals = fma_steer;
+ this->m_roots = new std::list<fma_root_node *>;
+ this->m_roots->push_back (fma_root);
+ this->m_id = id;
+}
+
+fma_forest::~fma_forest ()
+{
+ delete this->m_roots;
+}
+
+int
+fma_forest::get_id ()
+{
+ return this->m_id;
+}
+
+std::list<fma_root_node *> *
+fma_forest::get_roots ()
+{
+ return this->m_roots;
+}
+
+func_fma_steering *
+fma_forest::get_globals ()
+{
+ return this->m_globals;
+}
+
+int
+fma_forest::get_target_parity ()
+{
+ return this->m_target_parity;
+}
+
+/* Act on the creation of NODE by updating statistics in FOREST and adding an
+ entry for it in the func_fma_steering hashmap. */
+
+void fma_forest::fma_node_created (fma_node *node)
+{
+ bool created = !this->m_globals->put_node (node);
+
+ gcc_assert (created);
+ this->m_nb_nodes++;
+}
+
+/* Merge REF_FOREST and OTHER_FOREST together, making REF_FOREST the canonical
+ fma_forest object to represent both. */
+
+void
+fma_forest::merge_forest (fma_forest *other_forest)
+{
+ std::list<fma_root_node *> *other_roots;
+ std::list<fma_root_node *>::iterator other_root_iter;
+
+ if (this == other_forest)
+ return;
+
+ other_roots = other_forest->m_roots;
+
+ /* Update root nodes' pointer to forest. */
+ for (other_root_iter = other_roots->begin ();
+ other_root_iter != other_roots->end (); other_root_iter++)
+ (*other_root_iter)->set_forest (this);
+
+ /* Remove other_forest from the list of forests and move its tree roots in
+ the list of tree roots of ref_forest. */
+ this->m_globals->remove_forest (other_forest);
+ this->m_roots->splice (this->m_roots->begin (), *other_roots);
+ delete other_forest;
+
+ this->m_nb_nodes += other_forest->m_nb_nodes;
+}
+
+/* Dump information about the forest FOREST. */
+
+void
+fma_forest::dump_info ()
+{
+ gcc_assert (dump_file);
+
+ fprintf (dump_file, "Forest #%d has %d nodes\n", this->m_id,
+ this->m_nb_nodes);
+}
+
+/* Wrapper around fma_forest::dump_info for use as parameter of function
+ pointer type in func_fma_steering::dfs. */
+
+static void
+dump_forest_info (fma_forest *forest)
+{
+ forest->dump_info ();
+}
+
+/* Dispatch forest to the least utilized pipeline. */
+
+void
+fma_forest::dispatch ()
+{
+ this->m_target_parity = this->m_roots->front ()->get_parity ();
+ int fpu_balance = this->m_globals->get_fpu_balance ();
+ if (fpu_balance != 0)
+ this->m_target_parity = (fpu_balance < 0);
+
+ if (dump_file)
+ fprintf (dump_file, "Target parity for forest #%d: %s\n", this->m_id,
+ this->m_target_parity ? "odd" : "even");
+}
+
+/* Wrapper around fma_forest::dispatch for use as parameter of function pointer
+ type in func_fma_steering::dfs. */
+
+static void
+dispatch_forest (fma_forest *forest)
+{
+ forest->dispatch ();
+}
+
+fma_node::fma_node (fma_node *parent, du_chain *chain)
+{
+ memset (this, 0, sizeof (*this));
+ this->m_parent = parent;
+ this->m_children = new std::list<fma_node *>;
+ this->m_insn = chain->insn;
+ /* root_p () cannot be used to check for root before root is set. */
+ if (this->m_parent == this)
+ this->m_root = static_cast<fma_root_node *> (parent);
+ else
+ {
+ this->m_root = parent->m_root;
+ this->get_forest ()->fma_node_created (this);
+ }
+}
+
+fma_node::~fma_node ()
+{
+ delete this->m_children;
+}
+
+std::list<fma_node *> *
+fma_node::get_children ()
+{
+ return this->m_children;
+}
+
+rtx_insn *
+fma_node::get_insn ()
+{
+ return this->m_insn;
+}
+
+void
+fma_node::set_head (du_head *head)
+{
+ gcc_assert (!this->m_head);
+ this->m_head = head;
+}
+
+/* Add a child to this node in the list of children. */
+
+void
+fma_node::add_child (fma_node *child)
+{
+ this->m_children->push_back (child);
+}
+
+/* Return the parity of the destination register of the instruction represented
+ by this node. */
+
+int
+fma_node::get_parity ()
+{
+ return this->m_head->regno % 2;
+}
+
+/* Get the actual forest associated with a non root node as the one the node
+ points to might have been merged into another one. In that case the pointer
+ in the root nodes are updated so we return the forest pointer of a root node
+ pointed to by the initial forest. Despite being a oneliner, this method is
+ defined here as it references a method from fma_root_node. */
+
+fma_forest *
+fma_node::get_forest ()
+{
+ return this->m_root->get_forest ();
+}
+
+/* Return whether a node is a root node. */
+
+bool
+fma_node::root_p ()
+{
+ return this->m_root == this;
+}
+
+/* Dump information about the children of node FMA_NODE in forest FOREST. */
+
+void
+fma_node::dump_info (ATTRIBUTE_UNUSED fma_forest *forest)
+{
+ struct du_chain *chain;
+ std::list<fma_node *>::iterator fma_child;
+
+ gcc_assert (dump_file);
+
+ if (this->get_children ()->empty ())
+ return;
+
+ fprintf (dump_file, "Instruction(s)");
+ for (chain = this->m_head->first; chain; chain = chain->next_use)
+ {
+ if (!is_fmul_fmac_insn (chain->insn, true))
+ continue;
+
+ if (chain->loc != &SET_DEST (PATTERN (chain->insn)))
+ continue;
+
+ fprintf (dump_file, " %d", INSN_UID (chain->insn));
+ }
+
+ fprintf (dump_file, " is(are) accumulator dependency of instructions");
+ for (fma_child = this->get_children ()->begin ();
+ fma_child != this->get_children ()->end (); fma_child++)
+ fprintf (dump_file, " %d", INSN_UID ((*fma_child)->m_insn));
+ fprintf (dump_file, "\n");
+}
+
+/* Wrapper around fma_node::dump_info for use as parameter of function pointer
+ type in func_fma_steering::dfs. */
+
+static void
+dump_tree_node_info (fma_forest *forest, fma_node *node)
+{
+ node->dump_info (forest);
+}
+
+/* Rename the destination register of a single FMUL or FMADD/FMSUB instruction
+ represented by FMA_NODE to a register that respect the target parity for
+ FOREST or with same parity of the instruction represented by its parent node
+ if it has one. */
+
+void
+fma_node::rename (fma_forest *forest)
+{
+ int cur_parity, target_parity;
+
+ /* This is alternate root of a chain and thus has no children. It will be
+ renamed when processing the canonical root for that chain. */
+ if (!this->m_head)
+ return;
+
+ target_parity = forest->get_target_parity ();
+ if (this->m_parent)
+ target_parity = this->m_parent->get_parity ();
+ cur_parity = this->get_parity ();
+
+ /* Rename if parity differs. */
+ if (cur_parity != target_parity)
+ {
+ rtx_insn *insn = this->m_insn;
+ HARD_REG_SET unavailable;
+ enum machine_mode mode;
+ int reg;
+
+ if (dump_file)
+ {
+ unsigned cur_dest_reg = this->m_head->regno;
+
+ fprintf (dump_file, "FMA or FMUL at insn %d but destination "
+ "register (%s) has different parity from expected to "
+ "maximize FPU pipeline utilization\n", INSN_UID (insn),
+ reg_names[cur_dest_reg]);
+ }
+
+ /* Don't clobber traceback for noreturn functions. */
+ CLEAR_HARD_REG_SET (unavailable);
+ if (frame_pointer_needed)
+ {
+ add_to_hard_reg_set (&unavailable, Pmode, FRAME_POINTER_REGNUM);
+ add_to_hard_reg_set (&unavailable, Pmode, HARD_FRAME_POINTER_REGNUM);
+ }
+
+ /* Exclude registers with wrong parity. */
+ mode = GET_MODE (SET_DEST (PATTERN (insn)));
+ for (reg = cur_parity; reg < FIRST_PSEUDO_REGISTER; reg += 2)
+ add_to_hard_reg_set (&unavailable, mode, reg);
+
+ if (!rename_single_chain (this->m_head, &unavailable))
+ {
+ if (dump_file)
+ fprintf (dump_file, "Destination register of insn %d could not be "
+ "renamed. Dependent FMA insns will use this parity from "
+ "there on.\n", INSN_UID (insn));
+ }
+ else
+ cur_parity = target_parity;
+ }
+
+ forest->get_globals ()->update_balance (cur_parity);
+}
+
+/* Wrapper around fma_node::dump_info for use as parameter of function pointer
+ type in func_fma_steering::dfs. */
+
+static void
+rename_fma_node (fma_forest *forest, fma_node *node)
+{
+ node->rename (forest);
+}
+
+fma_root_node::fma_root_node (func_fma_steering *globals, du_chain *chain,
+ int id) : fma_node (this, chain)
+{
+ this->m_forest = new fma_forest (globals, this, id);
+ this->m_forest->fma_node_created (this);
+}
+
+fma_forest *
+fma_root_node::get_forest ()
+{
+ return this->m_forest;
+}
+
+void
+fma_root_node::set_forest (fma_forest *ref_forest)
+{
+ this->m_forest = ref_forest;
+}
+
+/* Dump information about the roots of forest FOREST. */
+
+void
+fma_root_node::dump_info (fma_forest *forest)
+{
+ gcc_assert (dump_file);
+
+ if (this == forest->get_roots ()->front ())
+ fprintf (dump_file, "Instruction(s) at root of forest #%d:",
+ forest->get_id ());
+ fprintf (dump_file, " %d", INSN_UID (this->m_insn));
+ if (this == forest->get_roots ()->back ())
+ fprintf (dump_file, "\n");
+}
+
+/* Wrapper around fma_root_node::dump_info for use as parameter of function
+ pointer type in func_fma_steering::dfs. */
+
+static void
+dump_tree_root_info (fma_forest *forest, fma_root_node *node)
+{
+ node->dump_info (forest);
+}
+
+func_fma_steering::func_fma_steering () : m_fpu_balance (0)
+{
+ this->m_insn_fma_head_map = new hash_map<rtx_insn *, fma_node *>;
+ this->m_fma_forests.clear ();
+ this->m_next_forest_id = 0;
+}
+
+func_fma_steering::~func_fma_steering ()
+{
+ delete this->m_insn_fma_head_map;
+}
+
+int
+func_fma_steering::get_fpu_balance ()
+{
+ return this->m_fpu_balance;
+}
+
+void
+func_fma_steering::remove_forest (fma_forest *forest)
+{
+ this->m_fma_forests.remove (forest);
+}
+
+/* Memorize the mapping of this instruction to its fma_node object and return
+ whether such a mapping existed. */
+
+bool
+func_fma_steering::put_node (fma_node *node)
+{
+ return this->m_insn_fma_head_map->put (node->get_insn (), node);
+}
+
+/* Update the current balance considering a node with the given PARITY. */
+
+void
+func_fma_steering::update_balance (int parity)
+{
+ this->m_fpu_balance = parity ? this->m_fpu_balance + 1
+ : this->m_fpu_balance - 1;
+}
+
+/* Return whether an fma_node object exists for instruction INSN and, if not,
+ allocate one in *RET. */
+
+fma_node *
+func_fma_steering::get_fma_node (rtx_insn *insn)
+{
+ fma_node **fma_slot;
+
+ fma_slot = this->m_insn_fma_head_map->get (insn);
+ if (fma_slot)
+ return *fma_slot;
+ return NULL;
+}
+
+/* Allocate and initialize fma_node objects for the FMUL or FMADD/FMSUB
+ instruction in CHAIN->insn and its dependent FMADD/FMSUB instructions, all
+ part of FOREST. For the children, the associated head is left untouched
+ (and thus null) as this function will be called again when considering the
+ chain where they are def. For the parent, the chain is given in HEAD. */
+
+void
+func_fma_steering::analyze_fma_fmul_insn (fma_forest *ref_forest,
+ du_chain *chain, du_head_p head)
+{
+ fma_forest *forest;
+ fma_node *node = this->get_fma_node (chain->insn);
+
+ /* This is a root node. */
+ if (!node)
+ {
+ fma_root_node *root_node;
+
+ root_node = new fma_root_node (this, chain, this->m_next_forest_id++);
+ forest = root_node->get_forest ();
+ node = root_node;
+
+ /* Until proved otherwise, assume this root is not part of an existing
+ forest and thus add its forest to the list of forests. */
+ this->m_fma_forests.push_back (forest);
+ }
+ else
+ forest = node->get_forest ();
+
+ node->set_head (head);
+
+ /* fma_node is part of a chain with several defs, one of them having already
+ been processed. The root of that already processed def is the canonical
+ one and the root of fma_node is added to its forest. No need to process
+ the children nodes as they were already processed when the other def was
+ processed. */
+ if (ref_forest)
+ {
+ ref_forest->merge_forest (forest);
+ return;
+ }
+
+ for (chain = head->first; chain; chain = chain->next_use)
+ {
+ fma_node *child_fma;
+ rtx fma_rtx, *accum_rtx_p;
+
+ if (!is_fmul_fmac_insn (chain->insn, false))
+ continue;
+
+ /* Get FMA rtx. */
+ fma_rtx = SET_SRC (PATTERN (chain->insn));
+ /* FMA is negated. */
+ if (GET_CODE (fma_rtx) == NEG)
+ fma_rtx = XEXP (fma_rtx, 0);
+ /* Get accumulator rtx. */
+ accum_rtx_p = &XEXP (fma_rtx, 2);
+ /* Accumulator is negated. */
+ if (!REG_P (*accum_rtx_p))
+ accum_rtx_p = &XEXP (*accum_rtx_p, 0);
+
+ /* This du_chain structure is not for the accumulator register. */
+ if (accum_rtx_p != chain->loc)
+ continue;
+
+ /* If object already created, this is a loop carried dependency. We
+ don't include this object in the children as we want trees for
+ rename_fma_trees to not be an infinite loop. */
+ if (this->get_fma_node (chain->insn))
+ continue;
+
+ child_fma = new fma_node (node, chain);
+
+ /* Memorize the mapping of this instruction to its fma_node object
+ as it will be processed for the chain starting at its destination
+ register later. */
+
+ /* Link to siblings. */
+ node->add_child (child_fma);
+ }
+}
+
+/* Perform a depth-first search of the forests of fma_node in
+ THIS->m_fma_forests, calling PROCESS_FOREST () on each fma_forest object in
+ THIS->m_fma_forests list, PROCESS_ROOT () on each tree root and
+ PROCESS_NODE () on each node. If FREE is true, free all std::list in the
+ same dfs. */
+
+void
+func_fma_steering::dfs (void (*process_forest) (fma_forest *),
+ void (*process_root) (fma_forest *, fma_root_node *),
+ void (*process_node) (fma_forest *, fma_node *),
+ bool free)
+{
+ vec<fma_node *> to_process;
+ std::list<fma_forest *>::iterator forest_iter;
+
+ to_process.create (0);
+
+ /* For each forest. */
+ for (forest_iter = this->m_fma_forests.begin ();
+ forest_iter != this->m_fma_forests.end (); forest_iter++)
+ {
+ std::list<fma_root_node *>::iterator root_iter;
+
+ if (process_forest)
+ process_forest (*forest_iter);
+
+ /* For each tree root in this forest. */
+ for (root_iter = (*forest_iter)->get_roots ()->begin ();
+ root_iter != (*forest_iter)->get_roots ()->end (); root_iter++)
+ {
+ if (process_root)
+ process_root (*forest_iter, *root_iter);
+ to_process.safe_push (*root_iter);
+ }
+
+ /* For each tree node in this forest. */
+ while (!to_process.is_empty ())
+ {
+ fma_node *node;
+ std::list<fma_node *>::iterator child_iter;
+
+ node = to_process.pop ();
+
+ if (process_node)
+ process_node (*forest_iter, node);
+
+ /* Absence of children might indicate an alternate root of a *chain*.
+ It's ok to skip it here as the chain will be renamed when
+ processing the canonical root for that chain. */
+ if (node->get_children ()->empty ())
+ continue;
+
+ for (child_iter = node->get_children ()->begin ();
+ child_iter != node->get_children ()->end (); child_iter++)
+ to_process.safe_push (*child_iter);
+ if (free)
+ {
+ if (node->root_p ())
+ delete static_cast<fma_root_node *> (node);
+ else
+ delete node;
+ }
+ }
+ if (free)
+ delete *forest_iter;
+ }
+
+ to_process.release ();
+}
+
+/* Build the dependency trees of FMUL and FMADD/FMSUB instructions. */
+
+void
+func_fma_steering::analyze ()
+{
+ int i, n_blocks, *bb_dfs_preorder;
+ basic_block bb;
+ rtx_insn *insn;
+
+ bb_dfs_preorder = XNEWVEC (int, last_basic_block_for_fn (cfun));
+ n_blocks = pre_and_rev_post_order_compute (bb_dfs_preorder, NULL, false);
+
+ /* Browse the graph of basic blocks looking for FMUL or FMADD/FMSUB
+ instructions. */
+ for (i = 0; i < n_blocks; i++)
+ {
+ bb = BASIC_BLOCK_FOR_FN (cfun, bb_dfs_preorder[i]);
+ FOR_BB_INSNS (bb, insn)
+ {
+ operand_rr_info *dest_op_info;
+ struct du_chain *chain;
+ unsigned dest_regno;
+ fma_forest *forest;
+ du_head_p head;
+ int i;
+
+ if (!is_fmul_fmac_insn (insn, true))
+ continue;
+
+ /* Search the chain where this instruction is (one of) the root. */
+ dest_op_info = insn_rr[INSN_UID (insn)].op_info;
+ dest_regno = REGNO (SET_DEST (PATTERN (insn)));
+ for (i = 0; i < dest_op_info->n_chains; i++)
+ {
+ /* The register tracked by this chain does not match the
+ destination register of insn. */
+ if (dest_op_info->heads[i]->regno != dest_regno)
+ continue;
+
+ head = dest_op_info->heads[i];
+ /* The chain was merged in another, find the new head. */
+ if (!head->first)
+ head = regrename_chain_from_id (head->id);
+
+ /* Search the chain element for this instruction and, if another
+ FMUL or FMADD/FMSUB instruction was already processed, note
+ the forest of its tree. */
+ forest = NULL;
+ for (chain = head->first; chain; chain = chain->next_use)
+ {
+ fma_node **fma_slot;
+
+ if (!is_fmul_fmac_insn (chain->insn, true))
+ continue;
+
+ /* This is a use, continue. */
+ if (chain->loc != &SET_DEST (PATTERN (chain->insn)))
+ continue;
+
+ if (chain->insn == insn)
+ break;
+
+ fma_slot = this->m_insn_fma_head_map->get (chain->insn);
+ if (fma_slot && (*fma_slot)->get_children ())
+ forest = (*fma_slot)->get_forest ();
+ }
+ if (chain)
+ break;
+ }
+
+ /* We didn't find a chain with a def for this instruction. */
+ gcc_assert (i < dest_op_info->n_chains);
+
+ this->analyze_fma_fmul_insn (forest, chain, head);
+ }
+ }
+ free (bb_dfs_preorder);
+
+ if (dump_file)
+ this->dfs (dump_forest_info, dump_tree_root_info, dump_tree_node_info,
+ false);
+}
+
+/* Perform the renaming of all chains with FMUL or FMADD/FMSUB involved with
+ the objective of keeping FPU pipeline balanced in term of instructions and
+ having FMADD/FMSUB with dependencies on previous FMUL or FMADD/FMSUB be
+ scheduled on the same pipeline. */
+
+void
+func_fma_steering::rename_fma_trees ()
+{
+ this->dfs (dispatch_forest, NULL, rename_fma_node, true);
+
+ if (dump_file && !this->m_fma_forests.empty ())
+ {
+ fprintf (dump_file, "Function %s has ", current_function_name ());
+ if (this->m_fpu_balance == 0)
+ fprintf (dump_file, "perfect balance of FMUL/FMA chains between the "
+ "two FPU pipelines\n");
+ else if (this->m_fpu_balance > 0)
+ fprintf (dump_file, "%d more FMUL/FMA chains scheduled on the second "
+ "FPU pipeline\n", this->m_fpu_balance);
+ else /* this->m_fpu_balance < 0 */
+ fprintf (dump_file, "%d more FMUL/FMA chains scheduled on the first "
+ "FPU pipeline\n", - this->m_fpu_balance);
+ }
+}
+
+/* Execute FMA steering pass. */
+
+void
+func_fma_steering::execute_fma_steering ()
+{
+ df_set_flags (DF_LR_RUN_DCE);
+ df_note_add_problem ();
+ df_analyze ();
+ df_set_flags (DF_DEFER_INSN_RESCAN);
+
+ regrename_init (true);
+ regrename_analyze (NULL);
+ this->analyze ();
+ this->rename_fma_trees ();
+ regrename_finish ();
+}
+
+const pass_data pass_data_fma_steering =
+{
+ RTL_PASS, /* type */
+ "fma_steering", /* name */
+ OPTGROUP_NONE, /* optinfo_flags */
+ TV_NONE, /* tv_id */
+ 0, /* properties_required */
+ 0, /* properties_provided */
+ 0, /* properties_destroyed */
+ 0, /* todo_flags_start */
+ TODO_df_finish, /* todo_flags_finish */
+};
+
+class pass_fma_steering : public rtl_opt_pass
+{
+public:
+ pass_fma_steering (gcc::context *ctxt)
+ : rtl_opt_pass (pass_data_fma_steering, ctxt)
+ {}
+
+ /* opt_pass methods: */
+ virtual bool gate (function *)
+ {
+ return optimize >= 2;
+ }
+
+ virtual unsigned int execute (function *)
+ {
+ func_fma_steering *fma_steering = new func_fma_steering;
+ fma_steering->execute_fma_steering ();
+ delete fma_steering;
+ return 0;
+ }
+
+}; // class pass_fma_steering
+
+/* Create a new fma steering pass instance. */
+
+static rtl_opt_pass *
+make_pass_fma_steering (gcc::context *ctxt)
+{
+ return new pass_fma_steering (ctxt);
+}
+
+/* Register the FMA steering pass to the pass manager. */
+
+void
+aarch64_register_fma_steering ()
+{
+ opt_pass *pass_fma_steering = make_pass_fma_steering (g);
+
+ static struct register_pass_info fma_steering_info
+ = { pass_fma_steering, "rnreg", 1, PASS_POS_INSERT_AFTER };
+
+ register_pass (&fma_steering_info);
+}
diff --git a/gcc/config/aarch64/cortex-a57-fma-steering.h b/gcc/config/aarch64/cortex-a57-fma-steering.h
new file mode 100644
index 00000000000..e8915acb423
--- /dev/null
+++ b/gcc/config/aarch64/cortex-a57-fma-steering.h
@@ -0,0 +1,22 @@
+/* This file contains declarations for the FMA steering optimization
+ pass for Cortex-A57.
+ Copyright (C) 2015 Free Software Foundation, Inc.
+ Contributed by ARM Ltd.
+
+ This file is part of GCC.
+
+ GCC is free software; you can redistribute it and/or modify it
+ under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3, or (at your option)
+ any later version.
+
+ GCC is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with GCC; see the file COPYING3. If not see
+ <http://www.gnu.org/licenses/>. */
+
+void aarch64_register_fma_steering (void);
diff --git a/gcc/config/aarch64/t-aarch64 b/gcc/config/aarch64/t-aarch64
index e410bc919eb..0371203f0a3 100644
--- a/gcc/config/aarch64/t-aarch64
+++ b/gcc/config/aarch64/t-aarch64
@@ -48,6 +48,15 @@ aarch-common.o: $(srcdir)/config/arm/aarch-common.c $(CONFIG_H) $(SYSTEM_H) \
$(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) \
$(srcdir)/config/arm/aarch-common.c
+cortex-a57-fma-steering.o: $(srcdir)/config/aarch64/cortex-a57-fma-steering.c \
+ $(CONFIG_H) $(SYSTEM_H) $(TM_H) $(REGS_H) insn-config.h $(RTL_BASE_H) \
+ dominance.h cfg.h cfganal.h $(BASIC_BLOCK_H) $(INSN_ATTR_H) $(RECOG_H) \
+ output.h hash-map.h $(DF_H) $(OBSTACK_H) $(TARGET_H) $(RTL_H) \
+ $(CONTEXT_H) $(TREE_PASS_H) regrename.h \
+ $(srcdir)/config/aarch64/cortex-a57-fma-steering.h
+ $(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) \
+ $(srcdir)/config/aarch64/cortex-a57-fma-steering.c
+
comma=,
MULTILIB_OPTIONS = $(subst $(comma),/, $(patsubst %, mabi=%, $(subst $(comma),$(comma)mabi=,$(TM_MULTILIB_CONFIG))))
MULTILIB_DIRNAMES = $(subst $(comma), ,$(TM_MULTILIB_CONFIG))