diff options
-rw-r--r-- | gcc/ChangeLog.linaro | 40 | ||||
-rw-r--r-- | gcc/config/aarch64/aarch64-cores.def | 1 | ||||
-rw-r--r-- | gcc/config/aarch64/aarch64-tune.md | 2 | ||||
-rw-r--r-- | gcc/config/aarch64/aarch64.c | 62 | ||||
-rw-r--r-- | gcc/config/aarch64/aarch64.md | 1 | ||||
-rw-r--r-- | gcc/config/arm/aarch-cost-tables.h | 101 | ||||
-rw-r--r-- | gcc/config/arm/arm-cores.def | 1 | ||||
-rw-r--r-- | gcc/config/arm/arm-tables.opt | 3 | ||||
-rw-r--r-- | gcc/config/arm/arm-tune.md | 2 | ||||
-rw-r--r-- | gcc/config/arm/arm.c | 22 | ||||
-rw-r--r-- | gcc/config/arm/arm.md | 11 | ||||
-rw-r--r-- | gcc/config/arm/bpabi.h | 2 | ||||
-rw-r--r-- | gcc/config/arm/t-arm | 1 | ||||
-rw-r--r-- | gcc/config/arm/xgene1.md | 531 | ||||
-rw-r--r-- | gcc/doc/invoke.texi | 6 |
15 files changed, 780 insertions, 6 deletions
diff --git a/gcc/ChangeLog.linaro b/gcc/ChangeLog.linaro index 499068a65e9..a05a96534ff 100644 --- a/gcc/ChangeLog.linaro +++ b/gcc/ChangeLog.linaro @@ -1,3 +1,43 @@ +2015-04-07 Michael Collison <michael.collison@linaro.org> + + Backport from trunk r219679. + 2015-01-15 Richard Earnshaw <rearnsha@arm.com> + + * arm.c (arm_xgene_tune): Add default initializer for instruction + fusion. + +2015-04-07 Michael Collison <michael.collison@linaro.org> + + Backport from trunk r219661. + 2015-01-15 Philipp Tomsich <philipp.tomsich@theobroma-systems.com> + + * config/arm/arm.md (generic_sched): Specify xgene1 in 'no' list. + Include xgene1.md. + * config/arm/arm.c (arm_issue_rate): Specify 4 for xgene1. + * config/arm/arm-cores.def (xgene1): New entry. + * config/arm/arm-tables.opt: Regenerate. + * config/arm/arm-tune.md: Regenerate. + * config/arm/bpabi.h (BE8_LINK_SPEC): Specify mcpu=xgene1. + +2015-04-07 Michael Collison <michael.collison@linaro.org> + + Backport from trunk r219657. + 2015-01-15 Philipp Tomsich <ptomsich@theobroma-systems.com> + + * config/aarch64/aarch64.md: Include xgene1.md. + * config/aarch64/xgene1.md: New file. + +2015-04-07 Michael Collison <michael.collison@linaro.org> + + Backport from trunk r219656. + 2015-01-15 Philipp Tomsich <philipp.tomsich@theobroma-systems.com> + + * config/aarch64/aarch64-cores.def (xgene1): Update/add the + xgene1 (APM XGene-1) core definition. + * gcc/config/aarch64/aarch64.c: Add cost tables for APM XGene-1 + * config/arm/aarch-cost-tables.h: Add cost tables for APM XGene-1 + * doc/invoke.texi: Document -mcpu=xgene1. + 2015-04-07 Yvan Roux <yvan.roux@linaro.org> Backport from trunk r217062, r217646, r218658. diff --git a/gcc/config/aarch64/aarch64-cores.def b/gcc/config/aarch64/aarch64-cores.def index 110b41f0190..2b694e67029 100644 --- a/gcc/config/aarch64/aarch64-cores.def +++ b/gcc/config/aarch64/aarch64-cores.def @@ -37,6 +37,7 @@ AARCH64_CORE("cortex-a53", cortexa53, cortexa53, 8, AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC, cortexa53) AARCH64_CORE("cortex-a57", cortexa15, cortexa15, 8, AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC, cortexa57) AARCH64_CORE("thunderx", thunderx, thunderx, 8, AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderx) +AARCH64_CORE("xgene1", xgene1, xgene1, 8, AARCH64_FL_FOR_ARCH8, xgene1) /* V8 big.LITTLE implementations. */ diff --git a/gcc/config/aarch64/aarch64-tune.md b/gcc/config/aarch64/aarch64-tune.md index c717ea848bc..640908234ae 100644 --- a/gcc/config/aarch64/aarch64-tune.md +++ b/gcc/config/aarch64/aarch64-tune.md @@ -1,5 +1,5 @@ ;; -*- buffer-read-only: t -*- ;; Generated automatically by gentune.sh from aarch64-cores.def (define_attr "tune" - "cortexa53,cortexa15,thunderx,cortexa57cortexa53" + "cortexa53,cortexa15,thunderx,xgene1,cortexa57cortexa53" (const (symbol_ref "((enum attr_tune) aarch64_tune)"))) diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c index ae2f1aabfc2..ce0ac180a8e 100644 --- a/gcc/config/aarch64/aarch64.c +++ b/gcc/config/aarch64/aarch64.c @@ -215,6 +215,27 @@ static const struct cpu_addrcost_table cortexa57_addrcost_table = #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007 __extension__ #endif +static const struct cpu_addrcost_table xgene1_addrcost_table = +{ +#if HAVE_DESIGNATED_INITIALIZERS + .addr_scale_costs = +#endif + { + NAMED_PARAM (hi, 1), + NAMED_PARAM (si, 0), + NAMED_PARAM (di, 0), + NAMED_PARAM (ti, 1), + }, + NAMED_PARAM (pre_modify, 1), + NAMED_PARAM (post_modify, 0), + NAMED_PARAM (register_offset, 0), + NAMED_PARAM (register_extend, 1), + NAMED_PARAM (imm_offset, 0), +}; + +#if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007 +__extension__ +#endif static const struct cpu_regmove_cost generic_regmove_cost = { NAMED_PARAM (GP2GP, 1), @@ -253,6 +274,16 @@ static const struct cpu_regmove_cost thunderx_regmove_cost = NAMED_PARAM (FP2FP, 4) }; +static const struct cpu_regmove_cost xgene1_regmove_cost = +{ + NAMED_PARAM (GP2GP, 1), + /* Avoid the use of slow int<->fp moves for spilling by setting + their cost higher than memmov_cost. */ + NAMED_PARAM (GP2FP, 8), + NAMED_PARAM (FP2GP, 8), + NAMED_PARAM (FP2FP, 2) +}; + /* Generic costs for vector insn classes. */ #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007 __extension__ @@ -293,6 +324,26 @@ static const struct cpu_vector_cost cortexa57_vector_cost = NAMED_PARAM (cond_not_taken_branch_cost, 1) }; +/* Generic costs for vector insn classes. */ +#if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007 +__extension__ +#endif +static const struct cpu_vector_cost xgene1_vector_cost = +{ + NAMED_PARAM (scalar_stmt_cost, 1), + NAMED_PARAM (scalar_load_cost, 5), + NAMED_PARAM (scalar_store_cost, 1), + NAMED_PARAM (vec_stmt_cost, 2), + NAMED_PARAM (vec_to_scalar_cost, 4), + NAMED_PARAM (scalar_to_vec_cost, 4), + NAMED_PARAM (vec_align_load_cost, 10), + NAMED_PARAM (vec_unalign_load_cost, 10), + NAMED_PARAM (vec_unalign_store_cost, 2), + NAMED_PARAM (vec_store_cost, 2), + NAMED_PARAM (cond_taken_branch_cost, 2), + NAMED_PARAM (cond_not_taken_branch_cost, 1) +}; + #define AARCH64_FUSE_NOTHING (0) #define AARCH64_FUSE_MOV_MOVK (1 << 0) #define AARCH64_FUSE_ADRP_ADD (1 << 1) @@ -360,6 +411,17 @@ static const struct tune_params thunderx_tunings = 1 /* vec_reassoc_width. */ }; +static const struct tune_params xgene1_tunings = +{ + &xgene1_extra_costs, + &xgene1_addrcost_table, + &xgene1_regmove_cost, + &xgene1_vector_cost, + NAMED_PARAM (memmov_cost, 6), + NAMED_PARAM (issue_rate, 4), + NAMED_PARAM (fuseable_ops, AARCH64_FUSE_NOTHING) +}; + /* A processor implementing AArch64. */ struct processor { diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md index 9788016f02d..0ab9c38e425 100644 --- a/gcc/config/aarch64/aarch64.md +++ b/gcc/config/aarch64/aarch64.md @@ -176,6 +176,7 @@ (include "../arm/cortex-a53.md") (include "../arm/cortex-a15.md") (include "thunderx.md") +(include "../arm/xgene1.md") ;; ------------------------------------------------------------------- ;; Jumps and other miscellaneous insns diff --git a/gcc/config/arm/aarch-cost-tables.h b/gcc/config/arm/aarch-cost-tables.h index adf8708fccd..7f740063300 100644 --- a/gcc/config/arm/aarch-cost-tables.h +++ b/gcc/config/arm/aarch-cost-tables.h @@ -325,4 +325,105 @@ const struct cpu_cost_table cortexa57_extra_costs = } }; +const struct cpu_cost_table xgene1_extra_costs = +{ + /* ALU */ + { + 0, /* arith. */ + 0, /* logical. */ + 0, /* shift. */ + COSTS_N_INSNS (1), /* shift_reg. */ + COSTS_N_INSNS (1), /* arith_shift. */ + COSTS_N_INSNS (1), /* arith_shift_reg. */ + COSTS_N_INSNS (1), /* log_shift. */ + COSTS_N_INSNS (1), /* log_shift_reg. */ + COSTS_N_INSNS (1), /* extend. */ + 0, /* extend_arithm. */ + COSTS_N_INSNS (1), /* bfi. */ + COSTS_N_INSNS (1), /* bfx. */ + 0, /* clz. */ + COSTS_N_INSNS (1), /* rev. */ + 0, /* non_exec. */ + true /* non_exec_costs_exec. */ + }, + { + /* MULT SImode */ + { + COSTS_N_INSNS (4), /* simple. */ + COSTS_N_INSNS (4), /* flag_setting. */ + COSTS_N_INSNS (4), /* extend. */ + COSTS_N_INSNS (4), /* add. */ + COSTS_N_INSNS (4), /* extend_add. */ + COSTS_N_INSNS (20) /* idiv. */ + }, + /* MULT DImode */ + { + COSTS_N_INSNS (5), /* simple. */ + 0, /* flag_setting (N/A). */ + COSTS_N_INSNS (5), /* extend. */ + COSTS_N_INSNS (5), /* add. */ + COSTS_N_INSNS (5), /* extend_add. */ + COSTS_N_INSNS (21) /* idiv. */ + } + }, + /* LD/ST */ + { + COSTS_N_INSNS (5), /* load. */ + COSTS_N_INSNS (6), /* load_sign_extend. */ + COSTS_N_INSNS (5), /* ldrd. */ + COSTS_N_INSNS (5), /* ldm_1st. */ + 1, /* ldm_regs_per_insn_1st. */ + 1, /* ldm_regs_per_insn_subsequent. */ + COSTS_N_INSNS (10), /* loadf. */ + COSTS_N_INSNS (10), /* loadd. */ + COSTS_N_INSNS (5), /* load_unaligned. */ + 0, /* store. */ + 0, /* strd. */ + 0, /* stm_1st. */ + 1, /* stm_regs_per_insn_1st. */ + 1, /* stm_regs_per_insn_subsequent. */ + 0, /* storef. */ + 0, /* stored. */ + 0, /* store_unaligned. */ + }, + { + /* FP SFmode */ + { + COSTS_N_INSNS (23), /* div. */ + COSTS_N_INSNS (5), /* mult. */ + COSTS_N_INSNS (5), /* mult_addsub. */ + COSTS_N_INSNS (5), /* fma. */ + COSTS_N_INSNS (5), /* addsub. */ + COSTS_N_INSNS (2), /* fpconst. */ + COSTS_N_INSNS (3), /* neg. */ + COSTS_N_INSNS (2), /* compare. */ + COSTS_N_INSNS (6), /* widen. */ + COSTS_N_INSNS (6), /* narrow. */ + COSTS_N_INSNS (4), /* toint. */ + COSTS_N_INSNS (4), /* fromint. */ + COSTS_N_INSNS (4) /* roundint. */ + }, + /* FP DFmode */ + { + COSTS_N_INSNS (29), /* div. */ + COSTS_N_INSNS (5), /* mult. */ + COSTS_N_INSNS (5), /* mult_addsub. */ + COSTS_N_INSNS (5), /* fma. */ + COSTS_N_INSNS (5), /* addsub. */ + COSTS_N_INSNS (3), /* fpconst. */ + COSTS_N_INSNS (3), /* neg. */ + COSTS_N_INSNS (2), /* compare. */ + COSTS_N_INSNS (6), /* widen. */ + COSTS_N_INSNS (6), /* narrow. */ + COSTS_N_INSNS (4), /* toint. */ + COSTS_N_INSNS (4), /* fromint. */ + COSTS_N_INSNS (4) /* roundint. */ + } + }, + /* Vector */ + { + COSTS_N_INSNS (2) /* alu. */ + } +}; + #endif /* GCC_AARCH_COST_TABLES_H */ diff --git a/gcc/config/arm/arm-cores.def b/gcc/config/arm/arm-cores.def index d5067b0e9b9..910431ab839 100644 --- a/gcc/config/arm/arm-cores.def +++ b/gcc/config/arm/arm-cores.def @@ -165,6 +165,7 @@ ARM_CORE("cortex-a15.cortex-a7", cortexa15cortexa7, cortexa7, 7A, FL_LDSCHED | /* V8 Architecture Processors */ ARM_CORE("cortex-a53", cortexa53, cortexa53, 8A, FL_LDSCHED | FL_CRC32, cortex_a53) ARM_CORE("cortex-a57", cortexa57, cortexa15, 8A, FL_LDSCHED | FL_CRC32, cortex_a57) +ARM_CORE("xgene1", xgene1, xgene1, 8A, FL_LDSCHED, xgene1) /* V8 big.LITTLE implementations */ ARM_CORE("cortex-a57.cortex-a53", cortexa57cortexa53, cortexa53, 8A, FL_LDSCHED | FL_CRC32, cortex_a57) diff --git a/gcc/config/arm/arm-tables.opt b/gcc/config/arm/arm-tables.opt index 9c7e944bfcb..7efd2b60941 100644 --- a/gcc/config/arm/arm-tables.opt +++ b/gcc/config/arm/arm-tables.opt @@ -304,6 +304,9 @@ EnumValue Enum(processor_type) String(cortex-a57) Value(cortexa57) EnumValue +Enum(processor_type) String(xgene1) Value(xgene1) + +EnumValue Enum(processor_type) String(cortex-a57.cortex-a53) Value(cortexa57cortexa53) Enum diff --git a/gcc/config/arm/arm-tune.md b/gcc/config/arm/arm-tune.md index 84355d69e46..97cca44b951 100644 --- a/gcc/config/arm/arm-tune.md +++ b/gcc/config/arm/arm-tune.md @@ -32,5 +32,5 @@ cortexr5,cortexr7,cortexm7, cortexm4,cortexm3,marvell_pj4, cortexa15cortexa7,cortexa53,cortexa57, - cortexa57cortexa53" + xgene1,cortexa57cortexa53" (const (symbol_ref "((enum attr_tune) arm_tune)"))) diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c index 1c741ed63c2..b02f56d6c7e 100644 --- a/gcc/config/arm/arm.c +++ b/gcc/config/arm/arm.c @@ -1944,6 +1944,25 @@ const struct tune_params arm_cortex_a57_tune = ARM_FUSE_MOVW_MOVT /* Fuseable pairs of instructions. */ }; +const struct tune_params arm_xgene1_tune = +{ + arm_9e_rtx_costs, + &xgene1_extra_costs, + NULL, /* Scheduler cost adjustment. */ + 1, /* Constant limit. */ + 2, /* Max cond insns. */ + ARM_PREFETCH_NOT_BENEFICIAL, + false, /* Prefer constant pool. */ + arm_default_branch_cost, + true, /* Prefer LDRD/STRD. */ + {true, true}, /* Prefer non short circuit. */ + &arm_default_vec_cost, /* Vectorizer costs. */ + false, /* Prefer Neon for 64-bits bitops. */ + true, true, /* Prefer 32-bit encodings. */ + ARM_SCHED_AUTOPREF_OFF, /* Sched L2 autopref. */ + ARM_FUSE_NOTHING /* Fuseable pairs of instructions. */ +}; + /* Branches can be dual-issued on Cortex-A5, so conditional execution is less appealing. Set max_insns_skipped to a low value. */ @@ -29929,6 +29948,9 @@ arm_issue_rate (void) { switch (arm_tune) { + case xgene1: + return 4; + case cortexa15: case cortexa57: return 3; diff --git a/gcc/config/arm/arm.md b/gcc/config/arm/arm.md index d931ed45d76..829421cb07c 100644 --- a/gcc/config/arm/arm.md +++ b/gcc/config/arm/arm.md @@ -109,6 +109,11 @@ ;; given instruction does not shift one of its input operands. (define_attr "shift" "" (const_int 0)) +;; [For compatibility with AArch64 in pipeline models] +;; Attribute that specifies whether or not the instruction touches fp +;; registers. +(define_attr "fp" "no,yes" (const_string "no")) + ; Floating Point Unit. If we only have floating point emulation, then there ; is no point in scheduling the floating point insns. (Well, for best ; performance we should try and group them together). @@ -373,7 +378,8 @@ arm926ejs,arm1020e,arm1026ejs,arm1136js,\ arm1136jfs,cortexa5,cortexa7,cortexa8,\ cortexa9,cortexa12,cortexa15,cortexa53,\ - cortexm4,cortexm7,marvell_pj4") + cortexm4,cortexm7,marvell_pj4, + xgene1") (eq_attr "tune_cortexr4" "yes")) (const_string "no") (const_string "yes")))) @@ -383,7 +389,7 @@ (and (eq_attr "fpu" "vfp") (eq_attr "tune" "!arm1020e,arm1022e,cortexa5,cortexa7,\ cortexa8,cortexa9,cortexa53,cortexm4,\ - cortexm7,marvell_pj4") + cortexm7,marvell_pj4,xgene1") (eq_attr "tune_cortexr4" "no")) (const_string "yes") (const_string "no")))) @@ -412,6 +418,7 @@ (include "cortex-m4-fpu.md") (include "vfp11.md") (include "marvell-pj4.md") +(include "xgene1.md") ;;--------------------------------------------------------------------------- diff --git a/gcc/config/arm/bpabi.h b/gcc/config/arm/bpabi.h index f99e1af1e1a..574500eb387 100644 --- a/gcc/config/arm/bpabi.h +++ b/gcc/config/arm/bpabi.h @@ -70,6 +70,7 @@ |mcpu=cortex-a53 \ |mcpu=cortex-a57 \ |mcpu=cortex-a57.cortex-a53 \ + |mcpu=xgene1 \ |mcpu=cortex-m1.small-multiply \ |mcpu=cortex-m0.small-multiply \ |mcpu=cortex-m0plus.small-multiply \ @@ -90,6 +91,7 @@ |mcpu=cortex-a53 \ |mcpu=cortex-a57 \ |mcpu=cortex-a57.cortex-a53 \ + |mcpu=xgene1 \ |mcpu=cortex-m1.small-multiply \ |mcpu=cortex-m0.small-multiply \ |mcpu=cortex-m0plus.small-multiply \ diff --git a/gcc/config/arm/t-arm b/gcc/config/arm/t-arm index 2ad7bf3ec17..52d0c82a6cd 100644 --- a/gcc/config/arm/t-arm +++ b/gcc/config/arm/t-arm @@ -40,6 +40,7 @@ MD_INCLUDES= $(srcdir)/config/arm/arm1020e.md \ $(srcdir)/config/arm/cortex-a9.md \ $(srcdir)/config/arm/cortex-a9-neon.md \ $(srcdir)/config/arm/cortex-a53.md \ + $(srcdir)/config/arm/xgene1.md \ $(srcdir)/config/arm/cortex-m4-fpu.md \ $(srcdir)/config/arm/cortex-m4.md \ $(srcdir)/config/arm/cortex-r4f.md \ diff --git a/gcc/config/arm/xgene1.md b/gcc/config/arm/xgene1.md new file mode 100644 index 00000000000..1fba538783b --- /dev/null +++ b/gcc/config/arm/xgene1.md @@ -0,0 +1,531 @@ +;; Machine description for AppliedMicro xgene1 core. +;; Copyright (C) 2012-2015 Free Software Foundation, Inc. +;; Contributed by Theobroma Systems Design und Consulting GmbH. +;; +;; This file is part of GCC. +;; +;; GCC is free software; you can redistribute it and/or modify it +;; under the terms of the GNU General Public License as published by +;; the Free Software Foundation; either version 3, or (at your option) +;; any later version. +;; +;; GCC is distributed in the hope that it will be useful, but +;; WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +;; General Public License for more details. +;; +;; You should have received a copy of the GNU General Public License +;; along with GCC; see the file COPYING3. If not see +;; <http://www.gnu.org/licenses/>. + +;; Pipeline description for the xgene1 micro-architecture + +(define_automaton "xgene1") + +(define_cpu_unit "xgene1_decode_out0" "xgene1") +(define_cpu_unit "xgene1_decode_out1" "xgene1") +(define_cpu_unit "xgene1_decode_out2" "xgene1") +(define_cpu_unit "xgene1_decode_out3" "xgene1") + +(define_cpu_unit "xgene1_divide" "xgene1") +(define_cpu_unit "xgene1_fp_divide" "xgene1") +(define_cpu_unit "xgene1_fsu" "xgene1") +(define_cpu_unit "xgene1_fcmp" "xgene1") + +(define_reservation "xgene1_decode1op" + "( xgene1_decode_out0 ) + |( xgene1_decode_out1 ) + |( xgene1_decode_out2 ) + |( xgene1_decode_out3 )" +) +(define_reservation "xgene1_decode2op" + "( xgene1_decode_out0 + xgene1_decode_out1 ) + |( xgene1_decode_out0 + xgene1_decode_out2 ) + |( xgene1_decode_out0 + xgene1_decode_out3 ) + |( xgene1_decode_out1 + xgene1_decode_out2 ) + |( xgene1_decode_out1 + xgene1_decode_out3 ) + |( xgene1_decode_out2 + xgene1_decode_out3 )" +) +(define_reservation "xgene1_decodeIsolated" + "( xgene1_decode_out0 + xgene1_decode_out1 + xgene1_decode_out2 + xgene1_decode_out3 )" +) + +(define_insn_reservation "xgene1_branch" 1 + (and (eq_attr "tune" "xgene1") + (eq_attr "type" "branch")) + "xgene1_decode1op") + +(define_insn_reservation "xgene1_nop" 1 + (and (eq_attr "tune" "xgene1") + (eq_attr "type" "no_insn")) + "xgene1_decode1op") + +(define_insn_reservation "xgene1_call" 1 + (and (eq_attr "tune" "xgene1") + (eq_attr "type" "call")) + "xgene1_decode2op") + +(define_insn_reservation "xgene1_f_load" 10 + (and (eq_attr "tune" "xgene1") + (eq_attr "type" "f_loadd,f_loads")) + "xgene1_decode2op") + +(define_insn_reservation "xgene1_f_store" 4 + (and (eq_attr "tune" "xgene1") + (eq_attr "type" "f_stored,f_stores")) + "xgene1_decode2op") + +(define_insn_reservation "xgene1_fmov" 2 + (and (eq_attr "tune" "xgene1") + (eq_attr "type" "fmov,fconsts,fconstd")) + "xgene1_decode1op") + +(define_insn_reservation "xgene1_f_mcr" 10 + (and (eq_attr "tune" "xgene1") + (eq_attr "type" "f_mcr")) + "xgene1_decodeIsolated") + +(define_insn_reservation "xgene1_f_mrc" 4 + (and (eq_attr "tune" "xgene1") + (eq_attr "type" "f_mrc")) + "xgene1_decode2op") + +(define_insn_reservation "xgene1_load_pair" 6 + (and (eq_attr "tune" "xgene1") + (eq_attr "type" "load2")) + "xgene1_decodeIsolated") + +(define_insn_reservation "xgene1_store_pair" 2 + (and (eq_attr "tune" "xgene1") + (eq_attr "type" "store2")) + "xgene1_decodeIsolated") + +(define_insn_reservation "xgene1_fp_load1" 10 + (and (eq_attr "tune" "xgene1") + (eq_attr "type" "load1") + (eq_attr "fp" "yes")) + "xgene1_decode1op") + +(define_insn_reservation "xgene1_load1" 5 + (and (eq_attr "tune" "xgene1") + (eq_attr "type" "load1")) + "xgene1_decode1op") + +(define_insn_reservation "xgene1_store1" 2 + (and (eq_attr "tune" "xgene1") + (eq_attr "type" "store1")) + "xgene1_decode2op") + +(define_insn_reservation "xgene1_move" 1 + (and (eq_attr "tune" "xgene1") + (eq_attr "type" "mov_reg,mov_imm,mrs")) + "xgene1_decode1op") + +(define_insn_reservation "xgene1_alu" 1 + (and (eq_attr "tune" "xgene1") + (eq_attr "type" "alu_imm,alu_reg,alu_shift_imm,\ + alu_ext,adc_reg,csel,logic_imm,\ + logic_reg,logic_shift_imm,clz,\ + rbit,shift_reg,adr,mov_reg,\ + mov_imm,extend")) + "xgene1_decode1op") + +(define_insn_reservation "xgene1_simd" 1 + (and (eq_attr "tune" "xgene1") + (eq_attr "type" "rev")) + "xgene1_decode1op") + +(define_insn_reservation "xgene1_alus" 1 + (and (eq_attr "tune" "xgene1") + (eq_attr "type" "alus_imm,alu_reg,alus_shift_imm,\ + alus_ext,logics_imm,logics_reg,\ + logics_shift_imm")) + "xgene1_decode1op") + +(define_insn_reservation "xgene1_mul" 6 + (and (eq_attr "tune" "xgene1") + (eq_attr "type" "mul,mla,smull,umull,smlal,umlal")) + "xgene1_decode2op") + +(define_insn_reservation "xgene1_div" 34 + (and (eq_attr "tune" "xgene1") + (eq_attr "type" "sdiv,udiv")) + "xgene1_decode1op,xgene1_divide*7") + +(define_insn_reservation "xgene1_fcmp" 10 + (and (eq_attr "tune" "xgene1") + (eq_attr "type" "fcmpd,fcmps")) + "xgene1_decode1op,xgene1_fsu+xgene1_fcmp*3") + +(define_insn_reservation "xgene1_fcsel" 3 + (and (eq_attr "tune" "xgene1") + (eq_attr "type" "fcsel")) + "xgene1_decode1op,xgene1_fsu") + +(define_insn_reservation "xgene1_bfm" 2 + (and (eq_attr "tune" "xgene1") + (eq_attr "type" "bfm")) + "xgene1_decode1op,xgene1_fsu") + +(define_insn_reservation "xgene1_f_rint" 5 + (and (eq_attr "tune" "xgene1") + (eq_attr "type" "f_rintd,f_rints")) + "xgene1_decode1op,xgene1_fsu") + +(define_insn_reservation "xgene1_f_cvt" 3 + (and (eq_attr "tune" "xgene1") + (eq_attr "type" "f_cvt")) + "xgene1_decode1op,xgene1_fsu") + +(define_insn_reservation "xgene1_f_cvtf2i" 11 + (and (eq_attr "tune" "xgene1") + (eq_attr "type" "f_cvtf2i")) + "xgene1_decodeIsolated,xgene1_fsu") + +(define_insn_reservation "xgene1_f_cvti2f" 14 + (and (eq_attr "tune" "xgene1") + (eq_attr "type" "f_cvti2f")) + "xgene1_decodeIsolated,xgene1_fsu") + +(define_insn_reservation "xgene1_f_add" 5 + (and (eq_attr "tune" "xgene1") + (eq_attr "type" "faddd,fadds,fmuld,fmuls")) + "xgene1_decode1op,xgene1_fsu") + +(define_insn_reservation "xgene1_f_divs" 22 + (and (eq_attr "tune" "xgene1") + (eq_attr "type" "fdivs,fsqrts")) + "xgene1_decode1op,(xgene1_fp_divide+xgene1_fsu)*8,xgene1_fp_divide*14") + +(define_insn_reservation "xgene1_f_divd" 28 + (and (eq_attr "tune" "xgene1") + (eq_attr "type" "fdivd")) + "xgene1_decode1op,(xgene1_fp_divide+xgene1_fsu)*11,xgene1_fp_divide*17") + +(define_insn_reservation "xgene1_f_sqrtd" 28 + (and (eq_attr "tune" "xgene1") + (eq_attr "type" "fsqrtd")) + "xgene1_decode1op,(xgene1_fp_divide+xgene1_fsu)*17,xgene1_fp_divide*11") + +(define_insn_reservation "xgene1_f_arith" 2 + (and (eq_attr "tune" "xgene1") + (eq_attr "type" "ffarithd,ffariths")) + "xgene1_decode1op,xgene1_fsu") + +(define_insn_reservation "xgene1_f_select" 3 + (and (eq_attr "tune" "xgene1") + (eq_attr "type" "f_minmaxd,f_minmaxs")) + "xgene1_decode1op,xgene1_fsu") + +(define_insn_reservation "xgene1_neon_dup" 3 + (and (eq_attr "tune" "xgene1") + (eq_attr "type" "neon_dup,neon_dup_q")) + "xgene1_decode1op,xgene1_fsu") + +(define_insn_reservation "xgene1_neon_load1" 11 + (and (eq_attr "tune" "xgene1") + (eq_attr "type" "neon_load1_1reg, neon_load1_1reg_q")) + "xgene1_decode2op,xgene1_fsu") + +(define_insn_reservation "xgene1_neon_store1" 5 + (and (eq_attr "tune" "xgene1") + (eq_attr "type" "neon_store1_1reg, neon_store1_1reg_q")) + "xgene1_decode2op,xgene1_fsu") + +(define_insn_reservation "xgene1_neon_logic" 2 + (and (eq_attr "tune" "xgene1") + (eq_attr "type" "neon_logic,\ + neon_logic_q,\ + neon_bsl,\ + neon_bsl_q,\ + neon_move,\ + neon_move_q,\ + ")) + "xgene1_decode1op,xgene1_fsu") + +(define_insn_reservation "xgene1_neon_umov" 7 + (and (eq_attr "tune" "xgene1") + (eq_attr "type" "neon_to_gp, neon_to_gp_q")) + "xgene1_decodeIsolated") + +(define_insn_reservation "xgene1_neon_ins" 14 + (and (eq_attr "tune" "xgene1") + (eq_attr "type" "neon_from_gp,\ + neon_from_gp_q,\ + neon_ins,\ + neon_ins_q,\ + ")) + "xgene1_decodeIsolated,xgene1_fsu") + +(define_insn_reservation "xgene1_neon_shift" 3 + (and (eq_attr "tune" "xgene1") + (eq_attr "type" "neon_shift_imm,\ + neon_shift_imm_q,\ + neon_shift_reg,\ + neon_shift_reg_q,\ + neon_shift_imm_long,\ + neon_sat_shift_imm,\ + neon_sat_shift_imm_q,\ + neon_sat_shift_imm_narrow_q,\ + neon_sat_shift_reg,\ + neon_sat_shift_reg_q,\ + neon_shift_imm_narrow_q,\ + ")) + "xgene1_decode1op,xgene1_fsu") + +(define_insn_reservation "xgene1_neon_arith" 3 + (and (eq_attr "tune" "xgene1") + (eq_attr "type" "neon_add,\ + neon_add_q,\ + neon_sub,\ + neon_sub_q,\ + neon_neg,\ + neon_neg_q,\ + neon_abs,\ + neon_abs_q,\ + neon_abd_q,\ + neon_arith_acc,\ + neon_arith_acc_q,\ + neon_reduc_add,\ + neon_reduc_add_q,\ + neon_add_halve,\ + neon_add_halve_q,\ + neon_sub_halve,\ + neon_sub_halve_q,\ + neon_qadd,\ + neon_qadd_q,\ + neon_compare,\ + neon_compare_q,\ + neon_compare_zero,\ + neon_compare_zero_q,\ + neon_tst,\ + neon_tst_q,\ + ")) + "xgene1_decode1op,xgene1_fsu") + +(define_insn_reservation "xgene1_neon_abs_diff" 6 + (and (eq_attr "tune" "xgene1") + (eq_attr "type" "neon_arith_acc,neon_arith_acc_q")) + "xgene1_decode2op,xgene1_fsu*2") + +(define_insn_reservation "xgene1_neon_mul" 5 + (and (eq_attr "tune" "xgene1") + (eq_attr "type" "neon_mul_b,\ + neon_mul_b_q,\ + neon_mul_h,\ + neon_mul_h_q,\ + neon_mul_s,\ + neon_mul_s_q,\ + neon_fp_mul_s_scalar,\ + neon_fp_mul_s_scalar_q,\ + neon_fp_mul_d_scalar_q,\ + neon_mla_b,neon_mla_b_q,\ + neon_mla_h,neon_mla_h_q,\ + neon_mla_s,neon_mla_s_q,\ + neon_mla_h_scalar,\ + neon_mla_h_scalar_q,\ + neon_mla_s_scalar,\ + neon_mla_s_scalar_q,\ + neon_mla_b_long,\ + neon_mla_h_long,\ + neon_mla_s_long,\ + neon_fp_mul_s,\ + neon_fp_mul_s_q,\ + neon_fp_mul_d,\ + neon_fp_mul_d_q,\ + neon_fp_mla_s,\ + neon_fp_mla_s_q,\ + neon_fp_mla_d,\ + neon_fp_mla_d_q,\ + neon_fp_mla_s_scalar,\ + neon_fp_mla_s_scalar_q,\ + neon_fp_mla_d_scalar_q,\ + neon_sat_mul_b,\ + neon_sat_mul_b_q,\ + neon_sat_mul_h,\ + neon_sat_mul_h_q,\ + neon_sat_mul_s,\ + neon_sat_mul_s_q,\ + neon_sat_mul_h_scalar,\ + neon_sat_mul_h_scalar_q,\ + neon_sat_mul_s_scalar,\ + neon_sat_mul_s_scalar_q,\ + neon_sat_mul_h_scalar_long,\ + neon_sat_mul_s_scalar_long,\ + neon_sat_mla_b_long,\ + neon_sat_mla_h_long,\ + neon_sat_mla_s_long,\ + neon_sat_mla_h_scalar_long,\ + neon_sat_mla_s_scalar_long,\ + ")) + "xgene1_decode2op,xgene1_fsu*2") + +(define_insn_reservation "xgene1_fp_abd_diff" 5 + (and (eq_attr "tune" "xgene1") + (eq_attr "type" "neon_fp_abd_s,\ + neon_fp_abd_s_q,\ + neon_fp_abd_d,\ + neon_fp_abd_d_q,\ + ")) + "xgene1_decode1op,xgene1_fsu") + +(define_insn_reservation "xgene1_neon_f_add" 5 + (and (eq_attr "tune" "xgene1") + (eq_attr "type" "neon_fp_addsub_s,\ + neon_fp_addsub_s_q,\ + neon_fp_addsub_d,\ + neon_fp_addsub_d_q,\ + ")) + "xgene1_decode1op") + +(define_insn_reservation "xgene1_neon_f_div" 2 + (and (eq_attr "tune" "xgene1") + (eq_attr "type" "neon_fp_div_s,\ + neon_fp_div_s_q,\ + neon_fp_div_d,\ + neon_fp_div_d_q,\ + ")) + "xgene1_decode1op,(xgene1_fsu+xgene1_fp_divide)") + +(define_insn_reservation "xgene1_neon_f_neg" 2 + (and (eq_attr "tune" "xgene1") + (eq_attr "type" "neon_fp_neg_s,\ + neon_fp_neg_s_q,\ + neon_fp_neg_d,\ + neon_fp_neg_d_q,\ + neon_fp_abs_s,\ + neon_fp_abs_s_q,\ + neon_fp_abs_d,\ + neon_fp_abs_d_q,\ + ")) + "xgene1_decode1op") + +(define_insn_reservation "xgene1_neon_f_round" 5 + (and (eq_attr "tune" "xgene1") + (eq_attr "type" "neon_fp_round_s,\ + neon_fp_round_s_q,\ + neon_fp_round_d,\ + neon_fp_round_d_q,\ + ")) + "xgene1_decode1op") + +(define_insn_reservation "xgene1_neon_f_cvt" 5 + (and (eq_attr "tune" "xgene1") + (eq_attr "type" "neon_int_to_fp_s,\ + neon_int_to_fp_s_q,\ + neon_int_to_fp_d,\ + neon_int_to_fp_d_q,\ + neon_fp_cvt_widen_s,\ + neon_fp_cvt_narrow_s_q,\ + neon_fp_cvt_narrow_d_q,\ + ")) + "xgene1_decode1op") + +(define_insn_reservation "xgene1_neon_f_reduc" 5 + (and (eq_attr "tune" "xgene1") + (eq_attr "type" "neon_fp_reduc_add_s,\ + neon_fp_reduc_add_s_q,\ + neon_fp_reduc_add_d,\ + neon_fp_reduc_add_d_q,\ + ")) + "xgene1_decode1op") + +(define_insn_reservation "xgene1_neon_cls" 2 + (and (eq_attr "tune" "xgene1") + (eq_attr "type" "neon_cls,neon_cls_q")) + "xgene1_decode1op") + +(define_insn_reservation "xgene1_neon_st1" 4 + (and (eq_attr "tune" "xgene1") + (eq_attr "type" "neon_store1_one_lane,\ + neon_store1_one_lane_q,\ + ")) + "xgene1_decode1op") + +(define_insn_reservation "xgene1_neon_halve_narrow" 6 + (and (eq_attr "tune" "xgene1") + (eq_attr "type" "neon_sub_halve_narrow_q,\ + neon_add_halve_narrow_q,\ + ")) + "xgene1_decodeIsolated") + +(define_insn_reservation "xgene1_neon_shift_acc" 6 + (and (eq_attr "tune" "xgene1") + (eq_attr "type" "neon_shift_acc,\ + neon_shift_acc_q,\ + ")) + "xgene1_decode2op") + +(define_insn_reservation "xgene1_neon_fp_compare" 3 + (and (eq_attr "tune" "xgene1") + (eq_attr "type" "neon_fp_compare_s,\ + neon_fp_compare_s_q,\ + neon_fp_compare_d,\ + neon_fp_compare_d_q,\ + ")) + "xgene1_decode1op") + +(define_insn_reservation "xgene1_neon_fp_sqrt" 2 + (and (eq_attr "tune" "xgene1") + (eq_attr "type" "neon_fp_sqrt_s,\ + neon_fp_sqrt_s_q,\ + neon_fp_sqrt_d,\ + neon_fp_sqrt_d_q,\ + ")) + "xgene1_decode1op,(xgene1_fsu+xgene1_fp_divide)") + +(define_insn_reservation "xgene1_neon_tbl1" 4 + (and (eq_attr "tune" "xgene1") + (eq_attr "type" "neon_tbl1,\ + neon_tbl1_q,\ + ")) + "xgene1_decode2op") + +(define_insn_reservation "xgene1_neon_tbl2" 8 + (and (eq_attr "tune" "xgene1") + (eq_attr "type" "neon_tbl2,\ + neon_tbl2_q,\ + ")) + "xgene1_decodeIsolated") + +(define_insn_reservation "xgene1_neon_permute" 3 + (and (eq_attr "tune" "xgene1") + (eq_attr "type" "neon_permute,\ + neon_permute_q,\ + ")) + "xgene1_decode2op") + +(define_insn_reservation "xgene1_neon_ld1r" 10 + (and (eq_attr "tune" "xgene1") + (eq_attr "type" "neon_load1_all_lanes,\ + ")) + "xgene1_decode1op") + +(define_insn_reservation "xgene1_neon_fp_recp" 3 + (and (eq_attr "tune" "xgene1") + (eq_attr "type" "neon_fp_recpe_s,\ + neon_fp_recpe_s_q,\ + neon_fp_recpe_d,\ + neon_fp_recpe_d_q,\ + neon_fp_recpx_s,\ + neon_fp_recpx_s_q,\ + neon_fp_recpx_d,\ + neon_fp_recpx_d_q,\ + ")) + "xgene1_decode1op") + + +(define_insn_reservation "xgene1_neon_fp_recp_s" 5 + (and (eq_attr "tune" "xgene1") + (eq_attr "type" "neon_fp_recps_s,\ + neon_fp_recps_s_q,\ + neon_fp_recps_d,\ + neon_fp_recps_d_q,\ + ")) + "xgene1_decode1op") + +(define_insn_reservation "xgene1_neon_pmull" 5 + (and (eq_attr "tune" "xgene1") + (eq_attr "type" "neon_mul_d_long,\ + ")) + "xgene1_decode2op") diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi index 1dcac767fa5..ccf64a2acea 100644 --- a/gcc/doc/invoke.texi +++ b/gcc/doc/invoke.texi @@ -11472,7 +11472,8 @@ architecture. @opindex mtune Specify the name of the target processor for which GCC should tune the performance of the code. Permissible values for this option are: -@samp{generic}, @samp{cortex-a53}, @samp{cortex-a57}, @samp{thunderx}. +@samp{generic}, @samp{cortex-a53}, @samp{cortex-a57}, @samp{thunderx}, +@samp{xgene1}. Additionally, this option can specify that GCC should tune the performance of the code for a big.LITTLE system. The only permissible value is @@ -12357,7 +12358,8 @@ Permissible names are: @samp{arm2}, @samp{arm250}, @samp{marvell-pj4}, @samp{xscale}, @samp{iwmmxt}, @samp{iwmmxt2}, @samp{ep9312}, @samp{fa526}, @samp{fa626}, -@samp{fa606te}, @samp{fa626te}, @samp{fmp626}, @samp{fa726te}. +@samp{fa606te}, @samp{fa626te}, @samp{fmp626}, @samp{fa726te}, +@samp{xgene1}. Additionally, this option can specify that GCC should tune the performance of the code for a big.LITTLE system. Permissible names are: |