summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorcollison <collison@138bc75d-0d04-0410-961f-82ee72b054a4>2015-04-07 21:05:46 +0000
committercollison <collison@138bc75d-0d04-0410-961f-82ee72b054a4>2015-04-07 21:05:46 +0000
commitf9999e623e33810e9823377b230a7115c3a837a3 (patch)
tree6fb3e26f167f83b4d323a3b40e07658bf12b8841
parent767f045a556fc1ea52464612f9fecbe356d37ae1 (diff)
downloadgcc-f9999e623e33810e9823377b230a7115c3a837a3.tar.gz
2015-04-07 Michael Collison <michael.collison@linaro.org>
Backport from trunk r219679. 2015-01-15 Richard Earnshaw <rearnsha@arm.com> * arm.c (arm_xgene_tune): Add default initializer for instruction fusion. 2015-04-07 Michael Collison <michael.collison@linaro.org> Backport from trunk r219661. 2015-01-15 Philipp Tomsich <philipp.tomsich@theobroma-systems.com> * config/arm/arm.md (generic_sched): Specify xgene1 in 'no' list. Include xgene1.md. * config/arm/arm.c (arm_issue_rate): Specify 4 for xgene1. * config/arm/arm-cores.def (xgene1): New entry. * config/arm/arm-tables.opt: Regenerate. * config/arm/arm-tune.md: Regenerate. * config/arm/bpabi.h (BE8_LINK_SPEC): Specify mcpu=xgene1. 2015-04-07 Michael Collison <michael.collison@linaro.org> Backport from trunk r219657. 2015-01-15 Philipp Tomsich <ptomsich@theobroma-systems.com> * config/aarch64/aarch64.md: Include xgene1.md. * config/aarch64/xgene1.md: New file. 2015-04-07 Michael Collison <michael.collison@linaro.org> Backport from trunk r219656. 2015-01-15 Philipp Tomsich <philipp.tomsich@theobroma-systems.com> * config/aarch64/aarch64-cores.def (xgene1): Update/add the xgene1 (APM XGene-1) core definition. * gcc/config/aarch64/aarch64.c: Add cost tables for APM XGene-1 * config/arm/aarch-cost-tables.h: Add cost tables for APM XGene-1 * doc/invoke.texi: Document -mcpu=xgene1. git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/branches/linaro/gcc-4_9-branch@221911 138bc75d-0d04-0410-961f-82ee72b054a4
-rw-r--r--gcc/ChangeLog.linaro40
-rw-r--r--gcc/config/aarch64/aarch64-cores.def1
-rw-r--r--gcc/config/aarch64/aarch64-tune.md2
-rw-r--r--gcc/config/aarch64/aarch64.c62
-rw-r--r--gcc/config/aarch64/aarch64.md1
-rw-r--r--gcc/config/arm/aarch-cost-tables.h101
-rw-r--r--gcc/config/arm/arm-cores.def1
-rw-r--r--gcc/config/arm/arm-tables.opt3
-rw-r--r--gcc/config/arm/arm-tune.md2
-rw-r--r--gcc/config/arm/arm.c22
-rw-r--r--gcc/config/arm/arm.md11
-rw-r--r--gcc/config/arm/bpabi.h2
-rw-r--r--gcc/config/arm/t-arm1
-rw-r--r--gcc/config/arm/xgene1.md531
-rw-r--r--gcc/doc/invoke.texi6
15 files changed, 780 insertions, 6 deletions
diff --git a/gcc/ChangeLog.linaro b/gcc/ChangeLog.linaro
index 499068a65e9..a05a96534ff 100644
--- a/gcc/ChangeLog.linaro
+++ b/gcc/ChangeLog.linaro
@@ -1,3 +1,43 @@
+2015-04-07 Michael Collison <michael.collison@linaro.org>
+
+ Backport from trunk r219679.
+ 2015-01-15 Richard Earnshaw <rearnsha@arm.com>
+
+ * arm.c (arm_xgene_tune): Add default initializer for instruction
+ fusion.
+
+2015-04-07 Michael Collison <michael.collison@linaro.org>
+
+ Backport from trunk r219661.
+ 2015-01-15 Philipp Tomsich <philipp.tomsich@theobroma-systems.com>
+
+ * config/arm/arm.md (generic_sched): Specify xgene1 in 'no' list.
+ Include xgene1.md.
+ * config/arm/arm.c (arm_issue_rate): Specify 4 for xgene1.
+ * config/arm/arm-cores.def (xgene1): New entry.
+ * config/arm/arm-tables.opt: Regenerate.
+ * config/arm/arm-tune.md: Regenerate.
+ * config/arm/bpabi.h (BE8_LINK_SPEC): Specify mcpu=xgene1.
+
+2015-04-07 Michael Collison <michael.collison@linaro.org>
+
+ Backport from trunk r219657.
+ 2015-01-15 Philipp Tomsich <ptomsich@theobroma-systems.com>
+
+ * config/aarch64/aarch64.md: Include xgene1.md.
+ * config/aarch64/xgene1.md: New file.
+
+2015-04-07 Michael Collison <michael.collison@linaro.org>
+
+ Backport from trunk r219656.
+ 2015-01-15 Philipp Tomsich <philipp.tomsich@theobroma-systems.com>
+
+ * config/aarch64/aarch64-cores.def (xgene1): Update/add the
+ xgene1 (APM XGene-1) core definition.
+ * gcc/config/aarch64/aarch64.c: Add cost tables for APM XGene-1
+ * config/arm/aarch-cost-tables.h: Add cost tables for APM XGene-1
+ * doc/invoke.texi: Document -mcpu=xgene1.
+
2015-04-07 Yvan Roux <yvan.roux@linaro.org>
Backport from trunk r217062, r217646, r218658.
diff --git a/gcc/config/aarch64/aarch64-cores.def b/gcc/config/aarch64/aarch64-cores.def
index 110b41f0190..2b694e67029 100644
--- a/gcc/config/aarch64/aarch64-cores.def
+++ b/gcc/config/aarch64/aarch64-cores.def
@@ -37,6 +37,7 @@
AARCH64_CORE("cortex-a53", cortexa53, cortexa53, 8, AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC, cortexa53)
AARCH64_CORE("cortex-a57", cortexa15, cortexa15, 8, AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC, cortexa57)
AARCH64_CORE("thunderx", thunderx, thunderx, 8, AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderx)
+AARCH64_CORE("xgene1", xgene1, xgene1, 8, AARCH64_FL_FOR_ARCH8, xgene1)
/* V8 big.LITTLE implementations. */
diff --git a/gcc/config/aarch64/aarch64-tune.md b/gcc/config/aarch64/aarch64-tune.md
index c717ea848bc..640908234ae 100644
--- a/gcc/config/aarch64/aarch64-tune.md
+++ b/gcc/config/aarch64/aarch64-tune.md
@@ -1,5 +1,5 @@
;; -*- buffer-read-only: t -*-
;; Generated automatically by gentune.sh from aarch64-cores.def
(define_attr "tune"
- "cortexa53,cortexa15,thunderx,cortexa57cortexa53"
+ "cortexa53,cortexa15,thunderx,xgene1,cortexa57cortexa53"
(const (symbol_ref "((enum attr_tune) aarch64_tune)")))
diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index ae2f1aabfc2..ce0ac180a8e 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -215,6 +215,27 @@ static const struct cpu_addrcost_table cortexa57_addrcost_table =
#if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
__extension__
#endif
+static const struct cpu_addrcost_table xgene1_addrcost_table =
+{
+#if HAVE_DESIGNATED_INITIALIZERS
+ .addr_scale_costs =
+#endif
+ {
+ NAMED_PARAM (hi, 1),
+ NAMED_PARAM (si, 0),
+ NAMED_PARAM (di, 0),
+ NAMED_PARAM (ti, 1),
+ },
+ NAMED_PARAM (pre_modify, 1),
+ NAMED_PARAM (post_modify, 0),
+ NAMED_PARAM (register_offset, 0),
+ NAMED_PARAM (register_extend, 1),
+ NAMED_PARAM (imm_offset, 0),
+};
+
+#if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
+__extension__
+#endif
static const struct cpu_regmove_cost generic_regmove_cost =
{
NAMED_PARAM (GP2GP, 1),
@@ -253,6 +274,16 @@ static const struct cpu_regmove_cost thunderx_regmove_cost =
NAMED_PARAM (FP2FP, 4)
};
+static const struct cpu_regmove_cost xgene1_regmove_cost =
+{
+ NAMED_PARAM (GP2GP, 1),
+ /* Avoid the use of slow int<->fp moves for spilling by setting
+ their cost higher than memmov_cost. */
+ NAMED_PARAM (GP2FP, 8),
+ NAMED_PARAM (FP2GP, 8),
+ NAMED_PARAM (FP2FP, 2)
+};
+
/* Generic costs for vector insn classes. */
#if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
__extension__
@@ -293,6 +324,26 @@ static const struct cpu_vector_cost cortexa57_vector_cost =
NAMED_PARAM (cond_not_taken_branch_cost, 1)
};
+/* Generic costs for vector insn classes. */
+#if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
+__extension__
+#endif
+static const struct cpu_vector_cost xgene1_vector_cost =
+{
+ NAMED_PARAM (scalar_stmt_cost, 1),
+ NAMED_PARAM (scalar_load_cost, 5),
+ NAMED_PARAM (scalar_store_cost, 1),
+ NAMED_PARAM (vec_stmt_cost, 2),
+ NAMED_PARAM (vec_to_scalar_cost, 4),
+ NAMED_PARAM (scalar_to_vec_cost, 4),
+ NAMED_PARAM (vec_align_load_cost, 10),
+ NAMED_PARAM (vec_unalign_load_cost, 10),
+ NAMED_PARAM (vec_unalign_store_cost, 2),
+ NAMED_PARAM (vec_store_cost, 2),
+ NAMED_PARAM (cond_taken_branch_cost, 2),
+ NAMED_PARAM (cond_not_taken_branch_cost, 1)
+};
+
#define AARCH64_FUSE_NOTHING (0)
#define AARCH64_FUSE_MOV_MOVK (1 << 0)
#define AARCH64_FUSE_ADRP_ADD (1 << 1)
@@ -360,6 +411,17 @@ static const struct tune_params thunderx_tunings =
1 /* vec_reassoc_width. */
};
+static const struct tune_params xgene1_tunings =
+{
+ &xgene1_extra_costs,
+ &xgene1_addrcost_table,
+ &xgene1_regmove_cost,
+ &xgene1_vector_cost,
+ NAMED_PARAM (memmov_cost, 6),
+ NAMED_PARAM (issue_rate, 4),
+ NAMED_PARAM (fuseable_ops, AARCH64_FUSE_NOTHING)
+};
+
/* A processor implementing AArch64. */
struct processor
{
diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
index 9788016f02d..0ab9c38e425 100644
--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -176,6 +176,7 @@
(include "../arm/cortex-a53.md")
(include "../arm/cortex-a15.md")
(include "thunderx.md")
+(include "../arm/xgene1.md")
;; -------------------------------------------------------------------
;; Jumps and other miscellaneous insns
diff --git a/gcc/config/arm/aarch-cost-tables.h b/gcc/config/arm/aarch-cost-tables.h
index adf8708fccd..7f740063300 100644
--- a/gcc/config/arm/aarch-cost-tables.h
+++ b/gcc/config/arm/aarch-cost-tables.h
@@ -325,4 +325,105 @@ const struct cpu_cost_table cortexa57_extra_costs =
}
};
+const struct cpu_cost_table xgene1_extra_costs =
+{
+ /* ALU */
+ {
+ 0, /* arith. */
+ 0, /* logical. */
+ 0, /* shift. */
+ COSTS_N_INSNS (1), /* shift_reg. */
+ COSTS_N_INSNS (1), /* arith_shift. */
+ COSTS_N_INSNS (1), /* arith_shift_reg. */
+ COSTS_N_INSNS (1), /* log_shift. */
+ COSTS_N_INSNS (1), /* log_shift_reg. */
+ COSTS_N_INSNS (1), /* extend. */
+ 0, /* extend_arithm. */
+ COSTS_N_INSNS (1), /* bfi. */
+ COSTS_N_INSNS (1), /* bfx. */
+ 0, /* clz. */
+ COSTS_N_INSNS (1), /* rev. */
+ 0, /* non_exec. */
+ true /* non_exec_costs_exec. */
+ },
+ {
+ /* MULT SImode */
+ {
+ COSTS_N_INSNS (4), /* simple. */
+ COSTS_N_INSNS (4), /* flag_setting. */
+ COSTS_N_INSNS (4), /* extend. */
+ COSTS_N_INSNS (4), /* add. */
+ COSTS_N_INSNS (4), /* extend_add. */
+ COSTS_N_INSNS (20) /* idiv. */
+ },
+ /* MULT DImode */
+ {
+ COSTS_N_INSNS (5), /* simple. */
+ 0, /* flag_setting (N/A). */
+ COSTS_N_INSNS (5), /* extend. */
+ COSTS_N_INSNS (5), /* add. */
+ COSTS_N_INSNS (5), /* extend_add. */
+ COSTS_N_INSNS (21) /* idiv. */
+ }
+ },
+ /* LD/ST */
+ {
+ COSTS_N_INSNS (5), /* load. */
+ COSTS_N_INSNS (6), /* load_sign_extend. */
+ COSTS_N_INSNS (5), /* ldrd. */
+ COSTS_N_INSNS (5), /* ldm_1st. */
+ 1, /* ldm_regs_per_insn_1st. */
+ 1, /* ldm_regs_per_insn_subsequent. */
+ COSTS_N_INSNS (10), /* loadf. */
+ COSTS_N_INSNS (10), /* loadd. */
+ COSTS_N_INSNS (5), /* load_unaligned. */
+ 0, /* store. */
+ 0, /* strd. */
+ 0, /* stm_1st. */
+ 1, /* stm_regs_per_insn_1st. */
+ 1, /* stm_regs_per_insn_subsequent. */
+ 0, /* storef. */
+ 0, /* stored. */
+ 0, /* store_unaligned. */
+ },
+ {
+ /* FP SFmode */
+ {
+ COSTS_N_INSNS (23), /* div. */
+ COSTS_N_INSNS (5), /* mult. */
+ COSTS_N_INSNS (5), /* mult_addsub. */
+ COSTS_N_INSNS (5), /* fma. */
+ COSTS_N_INSNS (5), /* addsub. */
+ COSTS_N_INSNS (2), /* fpconst. */
+ COSTS_N_INSNS (3), /* neg. */
+ COSTS_N_INSNS (2), /* compare. */
+ COSTS_N_INSNS (6), /* widen. */
+ COSTS_N_INSNS (6), /* narrow. */
+ COSTS_N_INSNS (4), /* toint. */
+ COSTS_N_INSNS (4), /* fromint. */
+ COSTS_N_INSNS (4) /* roundint. */
+ },
+ /* FP DFmode */
+ {
+ COSTS_N_INSNS (29), /* div. */
+ COSTS_N_INSNS (5), /* mult. */
+ COSTS_N_INSNS (5), /* mult_addsub. */
+ COSTS_N_INSNS (5), /* fma. */
+ COSTS_N_INSNS (5), /* addsub. */
+ COSTS_N_INSNS (3), /* fpconst. */
+ COSTS_N_INSNS (3), /* neg. */
+ COSTS_N_INSNS (2), /* compare. */
+ COSTS_N_INSNS (6), /* widen. */
+ COSTS_N_INSNS (6), /* narrow. */
+ COSTS_N_INSNS (4), /* toint. */
+ COSTS_N_INSNS (4), /* fromint. */
+ COSTS_N_INSNS (4) /* roundint. */
+ }
+ },
+ /* Vector */
+ {
+ COSTS_N_INSNS (2) /* alu. */
+ }
+};
+
#endif /* GCC_AARCH_COST_TABLES_H */
diff --git a/gcc/config/arm/arm-cores.def b/gcc/config/arm/arm-cores.def
index d5067b0e9b9..910431ab839 100644
--- a/gcc/config/arm/arm-cores.def
+++ b/gcc/config/arm/arm-cores.def
@@ -165,6 +165,7 @@ ARM_CORE("cortex-a15.cortex-a7", cortexa15cortexa7, cortexa7, 7A, FL_LDSCHED |
/* V8 Architecture Processors */
ARM_CORE("cortex-a53", cortexa53, cortexa53, 8A, FL_LDSCHED | FL_CRC32, cortex_a53)
ARM_CORE("cortex-a57", cortexa57, cortexa15, 8A, FL_LDSCHED | FL_CRC32, cortex_a57)
+ARM_CORE("xgene1", xgene1, xgene1, 8A, FL_LDSCHED, xgene1)
/* V8 big.LITTLE implementations */
ARM_CORE("cortex-a57.cortex-a53", cortexa57cortexa53, cortexa53, 8A, FL_LDSCHED | FL_CRC32, cortex_a57)
diff --git a/gcc/config/arm/arm-tables.opt b/gcc/config/arm/arm-tables.opt
index 9c7e944bfcb..7efd2b60941 100644
--- a/gcc/config/arm/arm-tables.opt
+++ b/gcc/config/arm/arm-tables.opt
@@ -304,6 +304,9 @@ EnumValue
Enum(processor_type) String(cortex-a57) Value(cortexa57)
EnumValue
+Enum(processor_type) String(xgene1) Value(xgene1)
+
+EnumValue
Enum(processor_type) String(cortex-a57.cortex-a53) Value(cortexa57cortexa53)
Enum
diff --git a/gcc/config/arm/arm-tune.md b/gcc/config/arm/arm-tune.md
index 84355d69e46..97cca44b951 100644
--- a/gcc/config/arm/arm-tune.md
+++ b/gcc/config/arm/arm-tune.md
@@ -32,5 +32,5 @@
cortexr5,cortexr7,cortexm7,
cortexm4,cortexm3,marvell_pj4,
cortexa15cortexa7,cortexa53,cortexa57,
- cortexa57cortexa53"
+ xgene1,cortexa57cortexa53"
(const (symbol_ref "((enum attr_tune) arm_tune)")))
diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c
index 1c741ed63c2..b02f56d6c7e 100644
--- a/gcc/config/arm/arm.c
+++ b/gcc/config/arm/arm.c
@@ -1944,6 +1944,25 @@ const struct tune_params arm_cortex_a57_tune =
ARM_FUSE_MOVW_MOVT /* Fuseable pairs of instructions. */
};
+const struct tune_params arm_xgene1_tune =
+{
+ arm_9e_rtx_costs,
+ &xgene1_extra_costs,
+ NULL, /* Scheduler cost adjustment. */
+ 1, /* Constant limit. */
+ 2, /* Max cond insns. */
+ ARM_PREFETCH_NOT_BENEFICIAL,
+ false, /* Prefer constant pool. */
+ arm_default_branch_cost,
+ true, /* Prefer LDRD/STRD. */
+ {true, true}, /* Prefer non short circuit. */
+ &arm_default_vec_cost, /* Vectorizer costs. */
+ false, /* Prefer Neon for 64-bits bitops. */
+ true, true, /* Prefer 32-bit encodings. */
+ ARM_SCHED_AUTOPREF_OFF, /* Sched L2 autopref. */
+ ARM_FUSE_NOTHING /* Fuseable pairs of instructions. */
+};
+
/* Branches can be dual-issued on Cortex-A5, so conditional execution is
less appealing. Set max_insns_skipped to a low value. */
@@ -29929,6 +29948,9 @@ arm_issue_rate (void)
{
switch (arm_tune)
{
+ case xgene1:
+ return 4;
+
case cortexa15:
case cortexa57:
return 3;
diff --git a/gcc/config/arm/arm.md b/gcc/config/arm/arm.md
index d931ed45d76..829421cb07c 100644
--- a/gcc/config/arm/arm.md
+++ b/gcc/config/arm/arm.md
@@ -109,6 +109,11 @@
;; given instruction does not shift one of its input operands.
(define_attr "shift" "" (const_int 0))
+;; [For compatibility with AArch64 in pipeline models]
+;; Attribute that specifies whether or not the instruction touches fp
+;; registers.
+(define_attr "fp" "no,yes" (const_string "no"))
+
; Floating Point Unit. If we only have floating point emulation, then there
; is no point in scheduling the floating point insns. (Well, for best
; performance we should try and group them together).
@@ -373,7 +378,8 @@
arm926ejs,arm1020e,arm1026ejs,arm1136js,\
arm1136jfs,cortexa5,cortexa7,cortexa8,\
cortexa9,cortexa12,cortexa15,cortexa53,\
- cortexm4,cortexm7,marvell_pj4")
+ cortexm4,cortexm7,marvell_pj4,
+ xgene1")
(eq_attr "tune_cortexr4" "yes"))
(const_string "no")
(const_string "yes"))))
@@ -383,7 +389,7 @@
(and (eq_attr "fpu" "vfp")
(eq_attr "tune" "!arm1020e,arm1022e,cortexa5,cortexa7,\
cortexa8,cortexa9,cortexa53,cortexm4,\
- cortexm7,marvell_pj4")
+ cortexm7,marvell_pj4,xgene1")
(eq_attr "tune_cortexr4" "no"))
(const_string "yes")
(const_string "no"))))
@@ -412,6 +418,7 @@
(include "cortex-m4-fpu.md")
(include "vfp11.md")
(include "marvell-pj4.md")
+(include "xgene1.md")
;;---------------------------------------------------------------------------
diff --git a/gcc/config/arm/bpabi.h b/gcc/config/arm/bpabi.h
index f99e1af1e1a..574500eb387 100644
--- a/gcc/config/arm/bpabi.h
+++ b/gcc/config/arm/bpabi.h
@@ -70,6 +70,7 @@
|mcpu=cortex-a53 \
|mcpu=cortex-a57 \
|mcpu=cortex-a57.cortex-a53 \
+ |mcpu=xgene1 \
|mcpu=cortex-m1.small-multiply \
|mcpu=cortex-m0.small-multiply \
|mcpu=cortex-m0plus.small-multiply \
@@ -90,6 +91,7 @@
|mcpu=cortex-a53 \
|mcpu=cortex-a57 \
|mcpu=cortex-a57.cortex-a53 \
+ |mcpu=xgene1 \
|mcpu=cortex-m1.small-multiply \
|mcpu=cortex-m0.small-multiply \
|mcpu=cortex-m0plus.small-multiply \
diff --git a/gcc/config/arm/t-arm b/gcc/config/arm/t-arm
index 2ad7bf3ec17..52d0c82a6cd 100644
--- a/gcc/config/arm/t-arm
+++ b/gcc/config/arm/t-arm
@@ -40,6 +40,7 @@ MD_INCLUDES= $(srcdir)/config/arm/arm1020e.md \
$(srcdir)/config/arm/cortex-a9.md \
$(srcdir)/config/arm/cortex-a9-neon.md \
$(srcdir)/config/arm/cortex-a53.md \
+ $(srcdir)/config/arm/xgene1.md \
$(srcdir)/config/arm/cortex-m4-fpu.md \
$(srcdir)/config/arm/cortex-m4.md \
$(srcdir)/config/arm/cortex-r4f.md \
diff --git a/gcc/config/arm/xgene1.md b/gcc/config/arm/xgene1.md
new file mode 100644
index 00000000000..1fba538783b
--- /dev/null
+++ b/gcc/config/arm/xgene1.md
@@ -0,0 +1,531 @@
+;; Machine description for AppliedMicro xgene1 core.
+;; Copyright (C) 2012-2015 Free Software Foundation, Inc.
+;; Contributed by Theobroma Systems Design und Consulting GmbH.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify it
+;; under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+;;
+;; GCC is distributed in the hope that it will be useful, but
+;; WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+;; General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3. If not see
+;; <http://www.gnu.org/licenses/>.
+
+;; Pipeline description for the xgene1 micro-architecture
+
+(define_automaton "xgene1")
+
+(define_cpu_unit "xgene1_decode_out0" "xgene1")
+(define_cpu_unit "xgene1_decode_out1" "xgene1")
+(define_cpu_unit "xgene1_decode_out2" "xgene1")
+(define_cpu_unit "xgene1_decode_out3" "xgene1")
+
+(define_cpu_unit "xgene1_divide" "xgene1")
+(define_cpu_unit "xgene1_fp_divide" "xgene1")
+(define_cpu_unit "xgene1_fsu" "xgene1")
+(define_cpu_unit "xgene1_fcmp" "xgene1")
+
+(define_reservation "xgene1_decode1op"
+ "( xgene1_decode_out0 )
+ |( xgene1_decode_out1 )
+ |( xgene1_decode_out2 )
+ |( xgene1_decode_out3 )"
+)
+(define_reservation "xgene1_decode2op"
+ "( xgene1_decode_out0 + xgene1_decode_out1 )
+ |( xgene1_decode_out0 + xgene1_decode_out2 )
+ |( xgene1_decode_out0 + xgene1_decode_out3 )
+ |( xgene1_decode_out1 + xgene1_decode_out2 )
+ |( xgene1_decode_out1 + xgene1_decode_out3 )
+ |( xgene1_decode_out2 + xgene1_decode_out3 )"
+)
+(define_reservation "xgene1_decodeIsolated"
+ "( xgene1_decode_out0 + xgene1_decode_out1 + xgene1_decode_out2 + xgene1_decode_out3 )"
+)
+
+(define_insn_reservation "xgene1_branch" 1
+ (and (eq_attr "tune" "xgene1")
+ (eq_attr "type" "branch"))
+ "xgene1_decode1op")
+
+(define_insn_reservation "xgene1_nop" 1
+ (and (eq_attr "tune" "xgene1")
+ (eq_attr "type" "no_insn"))
+ "xgene1_decode1op")
+
+(define_insn_reservation "xgene1_call" 1
+ (and (eq_attr "tune" "xgene1")
+ (eq_attr "type" "call"))
+ "xgene1_decode2op")
+
+(define_insn_reservation "xgene1_f_load" 10
+ (and (eq_attr "tune" "xgene1")
+ (eq_attr "type" "f_loadd,f_loads"))
+ "xgene1_decode2op")
+
+(define_insn_reservation "xgene1_f_store" 4
+ (and (eq_attr "tune" "xgene1")
+ (eq_attr "type" "f_stored,f_stores"))
+ "xgene1_decode2op")
+
+(define_insn_reservation "xgene1_fmov" 2
+ (and (eq_attr "tune" "xgene1")
+ (eq_attr "type" "fmov,fconsts,fconstd"))
+ "xgene1_decode1op")
+
+(define_insn_reservation "xgene1_f_mcr" 10
+ (and (eq_attr "tune" "xgene1")
+ (eq_attr "type" "f_mcr"))
+ "xgene1_decodeIsolated")
+
+(define_insn_reservation "xgene1_f_mrc" 4
+ (and (eq_attr "tune" "xgene1")
+ (eq_attr "type" "f_mrc"))
+ "xgene1_decode2op")
+
+(define_insn_reservation "xgene1_load_pair" 6
+ (and (eq_attr "tune" "xgene1")
+ (eq_attr "type" "load2"))
+ "xgene1_decodeIsolated")
+
+(define_insn_reservation "xgene1_store_pair" 2
+ (and (eq_attr "tune" "xgene1")
+ (eq_attr "type" "store2"))
+ "xgene1_decodeIsolated")
+
+(define_insn_reservation "xgene1_fp_load1" 10
+ (and (eq_attr "tune" "xgene1")
+ (eq_attr "type" "load1")
+ (eq_attr "fp" "yes"))
+ "xgene1_decode1op")
+
+(define_insn_reservation "xgene1_load1" 5
+ (and (eq_attr "tune" "xgene1")
+ (eq_attr "type" "load1"))
+ "xgene1_decode1op")
+
+(define_insn_reservation "xgene1_store1" 2
+ (and (eq_attr "tune" "xgene1")
+ (eq_attr "type" "store1"))
+ "xgene1_decode2op")
+
+(define_insn_reservation "xgene1_move" 1
+ (and (eq_attr "tune" "xgene1")
+ (eq_attr "type" "mov_reg,mov_imm,mrs"))
+ "xgene1_decode1op")
+
+(define_insn_reservation "xgene1_alu" 1
+ (and (eq_attr "tune" "xgene1")
+ (eq_attr "type" "alu_imm,alu_reg,alu_shift_imm,\
+ alu_ext,adc_reg,csel,logic_imm,\
+ logic_reg,logic_shift_imm,clz,\
+ rbit,shift_reg,adr,mov_reg,\
+ mov_imm,extend"))
+ "xgene1_decode1op")
+
+(define_insn_reservation "xgene1_simd" 1
+ (and (eq_attr "tune" "xgene1")
+ (eq_attr "type" "rev"))
+ "xgene1_decode1op")
+
+(define_insn_reservation "xgene1_alus" 1
+ (and (eq_attr "tune" "xgene1")
+ (eq_attr "type" "alus_imm,alu_reg,alus_shift_imm,\
+ alus_ext,logics_imm,logics_reg,\
+ logics_shift_imm"))
+ "xgene1_decode1op")
+
+(define_insn_reservation "xgene1_mul" 6
+ (and (eq_attr "tune" "xgene1")
+ (eq_attr "type" "mul,mla,smull,umull,smlal,umlal"))
+ "xgene1_decode2op")
+
+(define_insn_reservation "xgene1_div" 34
+ (and (eq_attr "tune" "xgene1")
+ (eq_attr "type" "sdiv,udiv"))
+ "xgene1_decode1op,xgene1_divide*7")
+
+(define_insn_reservation "xgene1_fcmp" 10
+ (and (eq_attr "tune" "xgene1")
+ (eq_attr "type" "fcmpd,fcmps"))
+ "xgene1_decode1op,xgene1_fsu+xgene1_fcmp*3")
+
+(define_insn_reservation "xgene1_fcsel" 3
+ (and (eq_attr "tune" "xgene1")
+ (eq_attr "type" "fcsel"))
+ "xgene1_decode1op,xgene1_fsu")
+
+(define_insn_reservation "xgene1_bfm" 2
+ (and (eq_attr "tune" "xgene1")
+ (eq_attr "type" "bfm"))
+ "xgene1_decode1op,xgene1_fsu")
+
+(define_insn_reservation "xgene1_f_rint" 5
+ (and (eq_attr "tune" "xgene1")
+ (eq_attr "type" "f_rintd,f_rints"))
+ "xgene1_decode1op,xgene1_fsu")
+
+(define_insn_reservation "xgene1_f_cvt" 3
+ (and (eq_attr "tune" "xgene1")
+ (eq_attr "type" "f_cvt"))
+ "xgene1_decode1op,xgene1_fsu")
+
+(define_insn_reservation "xgene1_f_cvtf2i" 11
+ (and (eq_attr "tune" "xgene1")
+ (eq_attr "type" "f_cvtf2i"))
+ "xgene1_decodeIsolated,xgene1_fsu")
+
+(define_insn_reservation "xgene1_f_cvti2f" 14
+ (and (eq_attr "tune" "xgene1")
+ (eq_attr "type" "f_cvti2f"))
+ "xgene1_decodeIsolated,xgene1_fsu")
+
+(define_insn_reservation "xgene1_f_add" 5
+ (and (eq_attr "tune" "xgene1")
+ (eq_attr "type" "faddd,fadds,fmuld,fmuls"))
+ "xgene1_decode1op,xgene1_fsu")
+
+(define_insn_reservation "xgene1_f_divs" 22
+ (and (eq_attr "tune" "xgene1")
+ (eq_attr "type" "fdivs,fsqrts"))
+ "xgene1_decode1op,(xgene1_fp_divide+xgene1_fsu)*8,xgene1_fp_divide*14")
+
+(define_insn_reservation "xgene1_f_divd" 28
+ (and (eq_attr "tune" "xgene1")
+ (eq_attr "type" "fdivd"))
+ "xgene1_decode1op,(xgene1_fp_divide+xgene1_fsu)*11,xgene1_fp_divide*17")
+
+(define_insn_reservation "xgene1_f_sqrtd" 28
+ (and (eq_attr "tune" "xgene1")
+ (eq_attr "type" "fsqrtd"))
+ "xgene1_decode1op,(xgene1_fp_divide+xgene1_fsu)*17,xgene1_fp_divide*11")
+
+(define_insn_reservation "xgene1_f_arith" 2
+ (and (eq_attr "tune" "xgene1")
+ (eq_attr "type" "ffarithd,ffariths"))
+ "xgene1_decode1op,xgene1_fsu")
+
+(define_insn_reservation "xgene1_f_select" 3
+ (and (eq_attr "tune" "xgene1")
+ (eq_attr "type" "f_minmaxd,f_minmaxs"))
+ "xgene1_decode1op,xgene1_fsu")
+
+(define_insn_reservation "xgene1_neon_dup" 3
+ (and (eq_attr "tune" "xgene1")
+ (eq_attr "type" "neon_dup,neon_dup_q"))
+ "xgene1_decode1op,xgene1_fsu")
+
+(define_insn_reservation "xgene1_neon_load1" 11
+ (and (eq_attr "tune" "xgene1")
+ (eq_attr "type" "neon_load1_1reg, neon_load1_1reg_q"))
+ "xgene1_decode2op,xgene1_fsu")
+
+(define_insn_reservation "xgene1_neon_store1" 5
+ (and (eq_attr "tune" "xgene1")
+ (eq_attr "type" "neon_store1_1reg, neon_store1_1reg_q"))
+ "xgene1_decode2op,xgene1_fsu")
+
+(define_insn_reservation "xgene1_neon_logic" 2
+ (and (eq_attr "tune" "xgene1")
+ (eq_attr "type" "neon_logic,\
+ neon_logic_q,\
+ neon_bsl,\
+ neon_bsl_q,\
+ neon_move,\
+ neon_move_q,\
+ "))
+ "xgene1_decode1op,xgene1_fsu")
+
+(define_insn_reservation "xgene1_neon_umov" 7
+ (and (eq_attr "tune" "xgene1")
+ (eq_attr "type" "neon_to_gp, neon_to_gp_q"))
+ "xgene1_decodeIsolated")
+
+(define_insn_reservation "xgene1_neon_ins" 14
+ (and (eq_attr "tune" "xgene1")
+ (eq_attr "type" "neon_from_gp,\
+ neon_from_gp_q,\
+ neon_ins,\
+ neon_ins_q,\
+ "))
+ "xgene1_decodeIsolated,xgene1_fsu")
+
+(define_insn_reservation "xgene1_neon_shift" 3
+ (and (eq_attr "tune" "xgene1")
+ (eq_attr "type" "neon_shift_imm,\
+ neon_shift_imm_q,\
+ neon_shift_reg,\
+ neon_shift_reg_q,\
+ neon_shift_imm_long,\
+ neon_sat_shift_imm,\
+ neon_sat_shift_imm_q,\
+ neon_sat_shift_imm_narrow_q,\
+ neon_sat_shift_reg,\
+ neon_sat_shift_reg_q,\
+ neon_shift_imm_narrow_q,\
+ "))
+ "xgene1_decode1op,xgene1_fsu")
+
+(define_insn_reservation "xgene1_neon_arith" 3
+ (and (eq_attr "tune" "xgene1")
+ (eq_attr "type" "neon_add,\
+ neon_add_q,\
+ neon_sub,\
+ neon_sub_q,\
+ neon_neg,\
+ neon_neg_q,\
+ neon_abs,\
+ neon_abs_q,\
+ neon_abd_q,\
+ neon_arith_acc,\
+ neon_arith_acc_q,\
+ neon_reduc_add,\
+ neon_reduc_add_q,\
+ neon_add_halve,\
+ neon_add_halve_q,\
+ neon_sub_halve,\
+ neon_sub_halve_q,\
+ neon_qadd,\
+ neon_qadd_q,\
+ neon_compare,\
+ neon_compare_q,\
+ neon_compare_zero,\
+ neon_compare_zero_q,\
+ neon_tst,\
+ neon_tst_q,\
+ "))
+ "xgene1_decode1op,xgene1_fsu")
+
+(define_insn_reservation "xgene1_neon_abs_diff" 6
+ (and (eq_attr "tune" "xgene1")
+ (eq_attr "type" "neon_arith_acc,neon_arith_acc_q"))
+ "xgene1_decode2op,xgene1_fsu*2")
+
+(define_insn_reservation "xgene1_neon_mul" 5
+ (and (eq_attr "tune" "xgene1")
+ (eq_attr "type" "neon_mul_b,\
+ neon_mul_b_q,\
+ neon_mul_h,\
+ neon_mul_h_q,\
+ neon_mul_s,\
+ neon_mul_s_q,\
+ neon_fp_mul_s_scalar,\
+ neon_fp_mul_s_scalar_q,\
+ neon_fp_mul_d_scalar_q,\
+ neon_mla_b,neon_mla_b_q,\
+ neon_mla_h,neon_mla_h_q,\
+ neon_mla_s,neon_mla_s_q,\
+ neon_mla_h_scalar,\
+ neon_mla_h_scalar_q,\
+ neon_mla_s_scalar,\
+ neon_mla_s_scalar_q,\
+ neon_mla_b_long,\
+ neon_mla_h_long,\
+ neon_mla_s_long,\
+ neon_fp_mul_s,\
+ neon_fp_mul_s_q,\
+ neon_fp_mul_d,\
+ neon_fp_mul_d_q,\
+ neon_fp_mla_s,\
+ neon_fp_mla_s_q,\
+ neon_fp_mla_d,\
+ neon_fp_mla_d_q,\
+ neon_fp_mla_s_scalar,\
+ neon_fp_mla_s_scalar_q,\
+ neon_fp_mla_d_scalar_q,\
+ neon_sat_mul_b,\
+ neon_sat_mul_b_q,\
+ neon_sat_mul_h,\
+ neon_sat_mul_h_q,\
+ neon_sat_mul_s,\
+ neon_sat_mul_s_q,\
+ neon_sat_mul_h_scalar,\
+ neon_sat_mul_h_scalar_q,\
+ neon_sat_mul_s_scalar,\
+ neon_sat_mul_s_scalar_q,\
+ neon_sat_mul_h_scalar_long,\
+ neon_sat_mul_s_scalar_long,\
+ neon_sat_mla_b_long,\
+ neon_sat_mla_h_long,\
+ neon_sat_mla_s_long,\
+ neon_sat_mla_h_scalar_long,\
+ neon_sat_mla_s_scalar_long,\
+ "))
+ "xgene1_decode2op,xgene1_fsu*2")
+
+(define_insn_reservation "xgene1_fp_abd_diff" 5
+ (and (eq_attr "tune" "xgene1")
+ (eq_attr "type" "neon_fp_abd_s,\
+ neon_fp_abd_s_q,\
+ neon_fp_abd_d,\
+ neon_fp_abd_d_q,\
+ "))
+ "xgene1_decode1op,xgene1_fsu")
+
+(define_insn_reservation "xgene1_neon_f_add" 5
+ (and (eq_attr "tune" "xgene1")
+ (eq_attr "type" "neon_fp_addsub_s,\
+ neon_fp_addsub_s_q,\
+ neon_fp_addsub_d,\
+ neon_fp_addsub_d_q,\
+ "))
+ "xgene1_decode1op")
+
+(define_insn_reservation "xgene1_neon_f_div" 2
+ (and (eq_attr "tune" "xgene1")
+ (eq_attr "type" "neon_fp_div_s,\
+ neon_fp_div_s_q,\
+ neon_fp_div_d,\
+ neon_fp_div_d_q,\
+ "))
+ "xgene1_decode1op,(xgene1_fsu+xgene1_fp_divide)")
+
+(define_insn_reservation "xgene1_neon_f_neg" 2
+ (and (eq_attr "tune" "xgene1")
+ (eq_attr "type" "neon_fp_neg_s,\
+ neon_fp_neg_s_q,\
+ neon_fp_neg_d,\
+ neon_fp_neg_d_q,\
+ neon_fp_abs_s,\
+ neon_fp_abs_s_q,\
+ neon_fp_abs_d,\
+ neon_fp_abs_d_q,\
+ "))
+ "xgene1_decode1op")
+
+(define_insn_reservation "xgene1_neon_f_round" 5
+ (and (eq_attr "tune" "xgene1")
+ (eq_attr "type" "neon_fp_round_s,\
+ neon_fp_round_s_q,\
+ neon_fp_round_d,\
+ neon_fp_round_d_q,\
+ "))
+ "xgene1_decode1op")
+
+(define_insn_reservation "xgene1_neon_f_cvt" 5
+ (and (eq_attr "tune" "xgene1")
+ (eq_attr "type" "neon_int_to_fp_s,\
+ neon_int_to_fp_s_q,\
+ neon_int_to_fp_d,\
+ neon_int_to_fp_d_q,\
+ neon_fp_cvt_widen_s,\
+ neon_fp_cvt_narrow_s_q,\
+ neon_fp_cvt_narrow_d_q,\
+ "))
+ "xgene1_decode1op")
+
+(define_insn_reservation "xgene1_neon_f_reduc" 5
+ (and (eq_attr "tune" "xgene1")
+ (eq_attr "type" "neon_fp_reduc_add_s,\
+ neon_fp_reduc_add_s_q,\
+ neon_fp_reduc_add_d,\
+ neon_fp_reduc_add_d_q,\
+ "))
+ "xgene1_decode1op")
+
+(define_insn_reservation "xgene1_neon_cls" 2
+ (and (eq_attr "tune" "xgene1")
+ (eq_attr "type" "neon_cls,neon_cls_q"))
+ "xgene1_decode1op")
+
+(define_insn_reservation "xgene1_neon_st1" 4
+ (and (eq_attr "tune" "xgene1")
+ (eq_attr "type" "neon_store1_one_lane,\
+ neon_store1_one_lane_q,\
+ "))
+ "xgene1_decode1op")
+
+(define_insn_reservation "xgene1_neon_halve_narrow" 6
+ (and (eq_attr "tune" "xgene1")
+ (eq_attr "type" "neon_sub_halve_narrow_q,\
+ neon_add_halve_narrow_q,\
+ "))
+ "xgene1_decodeIsolated")
+
+(define_insn_reservation "xgene1_neon_shift_acc" 6
+ (and (eq_attr "tune" "xgene1")
+ (eq_attr "type" "neon_shift_acc,\
+ neon_shift_acc_q,\
+ "))
+ "xgene1_decode2op")
+
+(define_insn_reservation "xgene1_neon_fp_compare" 3
+ (and (eq_attr "tune" "xgene1")
+ (eq_attr "type" "neon_fp_compare_s,\
+ neon_fp_compare_s_q,\
+ neon_fp_compare_d,\
+ neon_fp_compare_d_q,\
+ "))
+ "xgene1_decode1op")
+
+(define_insn_reservation "xgene1_neon_fp_sqrt" 2
+ (and (eq_attr "tune" "xgene1")
+ (eq_attr "type" "neon_fp_sqrt_s,\
+ neon_fp_sqrt_s_q,\
+ neon_fp_sqrt_d,\
+ neon_fp_sqrt_d_q,\
+ "))
+ "xgene1_decode1op,(xgene1_fsu+xgene1_fp_divide)")
+
+(define_insn_reservation "xgene1_neon_tbl1" 4
+ (and (eq_attr "tune" "xgene1")
+ (eq_attr "type" "neon_tbl1,\
+ neon_tbl1_q,\
+ "))
+ "xgene1_decode2op")
+
+(define_insn_reservation "xgene1_neon_tbl2" 8
+ (and (eq_attr "tune" "xgene1")
+ (eq_attr "type" "neon_tbl2,\
+ neon_tbl2_q,\
+ "))
+ "xgene1_decodeIsolated")
+
+(define_insn_reservation "xgene1_neon_permute" 3
+ (and (eq_attr "tune" "xgene1")
+ (eq_attr "type" "neon_permute,\
+ neon_permute_q,\
+ "))
+ "xgene1_decode2op")
+
+(define_insn_reservation "xgene1_neon_ld1r" 10
+ (and (eq_attr "tune" "xgene1")
+ (eq_attr "type" "neon_load1_all_lanes,\
+ "))
+ "xgene1_decode1op")
+
+(define_insn_reservation "xgene1_neon_fp_recp" 3
+ (and (eq_attr "tune" "xgene1")
+ (eq_attr "type" "neon_fp_recpe_s,\
+ neon_fp_recpe_s_q,\
+ neon_fp_recpe_d,\
+ neon_fp_recpe_d_q,\
+ neon_fp_recpx_s,\
+ neon_fp_recpx_s_q,\
+ neon_fp_recpx_d,\
+ neon_fp_recpx_d_q,\
+ "))
+ "xgene1_decode1op")
+
+
+(define_insn_reservation "xgene1_neon_fp_recp_s" 5
+ (and (eq_attr "tune" "xgene1")
+ (eq_attr "type" "neon_fp_recps_s,\
+ neon_fp_recps_s_q,\
+ neon_fp_recps_d,\
+ neon_fp_recps_d_q,\
+ "))
+ "xgene1_decode1op")
+
+(define_insn_reservation "xgene1_neon_pmull" 5
+ (and (eq_attr "tune" "xgene1")
+ (eq_attr "type" "neon_mul_d_long,\
+ "))
+ "xgene1_decode2op")
diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
index 1dcac767fa5..ccf64a2acea 100644
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@@ -11472,7 +11472,8 @@ architecture.
@opindex mtune
Specify the name of the target processor for which GCC should tune the
performance of the code. Permissible values for this option are:
-@samp{generic}, @samp{cortex-a53}, @samp{cortex-a57}, @samp{thunderx}.
+@samp{generic}, @samp{cortex-a53}, @samp{cortex-a57}, @samp{thunderx},
+@samp{xgene1}.
Additionally, this option can specify that GCC should tune the performance
of the code for a big.LITTLE system. The only permissible value is
@@ -12357,7 +12358,8 @@ Permissible names are: @samp{arm2}, @samp{arm250},
@samp{marvell-pj4},
@samp{xscale}, @samp{iwmmxt}, @samp{iwmmxt2}, @samp{ep9312},
@samp{fa526}, @samp{fa626},
-@samp{fa606te}, @samp{fa626te}, @samp{fmp626}, @samp{fa726te}.
+@samp{fa606te}, @samp{fa626te}, @samp{fmp626}, @samp{fa726te},
+@samp{xgene1}.
Additionally, this option can specify that GCC should tune the performance
of the code for a big.LITTLE system. Permissible names are: