diff options
author | jye2 <jye2@138bc75d-0d04-0410-961f-82ee72b054a4> | 2013-11-14 08:38:54 +0000 |
---|---|---|
committer | jye2 <jye2@138bc75d-0d04-0410-961f-82ee72b054a4> | 2013-11-14 08:38:54 +0000 |
commit | ccfff6a89aab4ea2cd04208d8bfa9aa53c09bd71 (patch) | |
tree | f49725ebe89f668e21e577c309ce2d7ad36ddbc7 | |
parent | 958941b7172fd3114830dbead38522cf34b36a4a (diff) | |
download | gcc-ccfff6a89aab4ea2cd04208d8bfa9aa53c09bd71.tar.gz |
2013-11-14 Julian Brown <julian@codesourcery.com>
Joey Ye <joey.ye@arm.com>
* config/arm/arm.c (arm_cortex_m_branch_cost): New.
(arm_v7m_tune): New.
(arm_slowmul_tune, arm_fastmul_tune,
arm_strongarm_tune, arm_9e_tune, arm_v6t2_tune,
arm_cortex_tune, arm_cortex_a15_tune,
arm_cortex_a5_tune, arm_v6m_tune): Add comments
for Sched adj cost.
* config/arm/arm-cores.def (cortex-m4, cortex-m3):
Use arm_v7m_tune.
testsuite:
2013-11-14 Joey Ye <joey.ye@arm.com>
* gcc.dg/tree-ssa/forwprop-28.c: Disable for cortex_m.
* gcc.dg/tree-ssa/vrp47.c: Likewise.
* gcc.dg/tree-ssa/vrp87.c: Likewise.
* gcc.dg/tree-ssa/ssa-dom-thread-4.c: Ingore for cortex_m.
* gcc.dg/tree-ssa/ssa-vrp-thread-1.c: Likewise.
git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@204778 138bc75d-0d04-0410-961f-82ee72b054a4
-rw-r--r-- | gcc/ChangeLog | 13 | ||||
-rw-r--r-- | gcc/config/arm/arm-cores.def | 4 | ||||
-rw-r--r-- | gcc/config/arm/arm.c | 56 | ||||
-rw-r--r-- | gcc/testsuite/ChangeLog | 8 | ||||
-rw-r--r-- | gcc/testsuite/gcc.dg/tree-ssa/forwprop-28.c | 4 | ||||
-rw-r--r-- | gcc/testsuite/gcc.dg/tree-ssa/ssa-dom-thread-4.c | 4 | ||||
-rw-r--r-- | gcc/testsuite/gcc.dg/tree-ssa/ssa-vrp-thread-1.c | 4 | ||||
-rw-r--r-- | gcc/testsuite/gcc.dg/tree-ssa/vrp47.c | 4 | ||||
-rw-r--r-- | gcc/testsuite/gcc.dg/tree-ssa/vrp87.c | 4 |
9 files changed, 85 insertions, 16 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 1a591cb8437..636cdbad92b 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,16 @@ +2013-11-14 Julian Brown <julian@codesourcery.com> + Joey Ye <joey.ye@arm.com> + + * config/arm/arm.c (arm_cortex_m_branch_cost): New. + (arm_v7m_tune): New. + (arm_slowmul_tune, arm_fastmul_tune, + arm_strongarm_tune, arm_9e_tune, arm_v6t2_tune, + arm_cortex_tune, arm_cortex_a15_tune, + arm_cortex_a5_tune, arm_v6m_tune): Add comments + for Sched adj cost. + * config/arm/arm-cores.def (cortex-m4, cortex-m3): + Use arm_v7m_tune. + 2013-11-14 Kirill Yukhin <kirill.yukhin@intel.com> PR target/57491 diff --git a/gcc/config/arm/arm-cores.def b/gcc/config/arm/arm-cores.def index 79e2e87b72b..d664e736424 100644 --- a/gcc/config/arm/arm-cores.def +++ b/gcc/config/arm/arm-cores.def @@ -134,8 +134,8 @@ ARM_CORE("cortex-r4", cortexr4, 7R, FL_LDSCHED, cortex) ARM_CORE("cortex-r4f", cortexr4f, 7R, FL_LDSCHED, cortex) ARM_CORE("cortex-r5", cortexr5, 7R, FL_LDSCHED | FL_ARM_DIV, cortex) ARM_CORE("cortex-r7", cortexr7, 7R, FL_LDSCHED | FL_ARM_DIV, cortex) -ARM_CORE("cortex-m4", cortexm4, 7EM, FL_LDSCHED, cortex) -ARM_CORE("cortex-m3", cortexm3, 7M, FL_LDSCHED, cortex) +ARM_CORE("cortex-m4", cortexm4, 7EM, FL_LDSCHED, v7m) +ARM_CORE("cortex-m3", cortexm3, 7M, FL_LDSCHED, v7m) ARM_CORE("cortex-m1", cortexm1, 6M, FL_LDSCHED, v6m) ARM_CORE("cortex-m0", cortexm0, 6M, FL_LDSCHED, v6m) ARM_CORE("cortex-m0plus", cortexm0plus, 6M, FL_LDSCHED, v6m) diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c index d3734c33f0b..64c6b4965ff 100644 --- a/gcc/config/arm/arm.c +++ b/gcc/config/arm/arm.c @@ -266,6 +266,7 @@ static reg_class_t arm_preferred_rename_class (reg_class_t rclass); static unsigned int arm_autovectorize_vector_sizes (void); static int arm_default_branch_cost (bool, bool); static int arm_cortex_a5_branch_cost (bool, bool); +static int arm_cortex_m_branch_cost (bool, bool); static bool arm_vectorize_vec_perm_const_ok (enum machine_mode vmode, const unsigned char *sel); @@ -1260,7 +1261,7 @@ const struct tune_params arm_slowmul_tune = { arm_slowmul_rtx_costs, NULL, - NULL, + NULL, /* Sched adj cost. */ 3, /* Constant limit. */ 5, /* Max cond insns. */ ARM_PREFETCH_NOT_BENEFICIAL, @@ -1276,7 +1277,7 @@ const struct tune_params arm_fastmul_tune = { arm_fastmul_rtx_costs, NULL, - NULL, + NULL, /* Sched adj cost. */ 1, /* Constant limit. */ 5, /* Max cond insns. */ ARM_PREFETCH_NOT_BENEFICIAL, @@ -1295,7 +1296,7 @@ const struct tune_params arm_strongarm_tune = { arm_fastmul_rtx_costs, NULL, - NULL, + NULL, /* Sched adj cost. */ 1, /* Constant limit. */ 3, /* Max cond insns. */ ARM_PREFETCH_NOT_BENEFICIAL, @@ -1327,7 +1328,7 @@ const struct tune_params arm_9e_tune = { arm_9e_rtx_costs, NULL, - NULL, + NULL, /* Sched adj cost. */ 1, /* Constant limit. */ 5, /* Max cond insns. */ ARM_PREFETCH_NOT_BENEFICIAL, @@ -1343,7 +1344,7 @@ const struct tune_params arm_v6t2_tune = { arm_9e_rtx_costs, NULL, - NULL, + NULL, /* Sched adj cost. */ 1, /* Constant limit. */ 5, /* Max cond insns. */ ARM_PREFETCH_NOT_BENEFICIAL, @@ -1360,7 +1361,7 @@ const struct tune_params arm_cortex_tune = { arm_9e_rtx_costs, &generic_extra_costs, - NULL, + NULL, /* Sched adj cost. */ 1, /* Constant limit. */ 5, /* Max cond insns. */ ARM_PREFETCH_NOT_BENEFICIAL, @@ -1392,7 +1393,7 @@ const struct tune_params arm_cortex_a15_tune = { arm_9e_rtx_costs, &cortexa15_extra_costs, - NULL, + NULL, /* Sched adj cost. */ 1, /* Constant limit. */ 2, /* Max cond insns. */ ARM_PREFETCH_NOT_BENEFICIAL, @@ -1411,7 +1412,7 @@ const struct tune_params arm_cortex_a5_tune = { arm_9e_rtx_costs, NULL, - NULL, + NULL, /* Sched adj cost. */ 1, /* Constant limit. */ 1, /* Max cond insns. */ ARM_PREFETCH_NOT_BENEFICIAL, @@ -1439,13 +1440,36 @@ const struct tune_params arm_cortex_a9_tune = false /* Prefer Neon for 64-bits bitops. */ }; +/* armv7m tuning. On Cortex-M4 cores for example, MOVW/MOVT take a single + cycle to execute each. An LDR from the constant pool also takes two cycles + to execute, but mildly increases pipelining opportunity (consecutive + loads/stores can be pipelined together, saving one cycle), and may also + improve icache utilisation. Hence we prefer the constant pool for such + processors. */ + +const struct tune_params arm_v7m_tune = +{ + arm_9e_rtx_costs, + &generic_extra_costs, + NULL, /* Sched adj cost. */ + 1, /* Constant limit. */ + 5, /* Max cond insns. */ + ARM_PREFETCH_NOT_BENEFICIAL, + true, /* Prefer constant pool. */ + arm_cortex_m_branch_cost, + false, /* Prefer LDRD/STRD. */ + {false, false}, /* Prefer non short circuit. */ + &arm_default_vec_cost, /* Vectorizer costs. */ + false /* Prefer Neon for 64-bits bitops. */ +}; + /* The arm_v6m_tune is duplicated from arm_cortex_tune, rather than arm_v6t2_tune. It is used for cortex-m0, cortex-m1 and cortex-m0plus. */ const struct tune_params arm_v6m_tune = { arm_9e_rtx_costs, NULL, - NULL, + NULL, /* Sched adj cost. */ 1, /* Constant limit. */ 5, /* Max cond insns. */ ARM_PREFETCH_NOT_BENEFICIAL, @@ -11241,6 +11265,20 @@ arm_cortex_a5_branch_cost (bool speed_p, bool predictable_p) return speed_p ? 0 : arm_default_branch_cost (speed_p, predictable_p); } +/* Thumb-2 branches are relatively cheap on Cortex-M processors ("1 + P cycles" + on Cortex-M4, where P varies from 1 to 3 according to some criteria), since + sequences of non-executed instructions in IT blocks probably take the same + amount of time as executed instructions (and the IT instruction itself takes + space in icache). This function was experimentally determined to give good + results on a popular embedded benchmark. */ + +static int +arm_cortex_m_branch_cost (bool speed_p, bool predictable_p) +{ + return (TARGET_32BIT && speed_p) ? 1 + : arm_default_branch_cost (speed_p, predictable_p); +} + static bool fp_consts_inited = false; static REAL_VALUE_TYPE value_fp0; diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index ec8d2485acc..6d8e43a09ee 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,11 @@ +2013-11-14 Joey Ye <joey.ye@arm.com> + + * gcc.dg/tree-ssa/forwprop-28.c: Disable for cortex_m. + * gcc.dg/tree-ssa/vrp47.c: Likewise. + * gcc.dg/tree-ssa/vrp87.c: Likewise. + * gcc.dg/tree-ssa/ssa-dom-thread-4.c: Ingore for cortex_m. + * gcc.dg/tree-ssa/ssa-vrp-thread-1.c: Likewise. + 2013-11-14 Adam Butcher <adam@jessamine.co.uk> PR c++/58533 diff --git a/gcc/testsuite/gcc.dg/tree-ssa/forwprop-28.c b/gcc/testsuite/gcc.dg/tree-ssa/forwprop-28.c index 06b406fc690..1a4bf4a4444 100644 --- a/gcc/testsuite/gcc.dg/tree-ssa/forwprop-28.c +++ b/gcc/testsuite/gcc.dg/tree-ssa/forwprop-28.c @@ -1,5 +1,9 @@ /* { dg-do compile { target { ! "m68k*-*-* mmix*-*-* mep*-*-* bfin*-*-* v850*-*-* picochip*-*-* moxie*-*-* cris*-*-* m32c*-*-* fr30*-*-* mcore*-*-* powerpc*-*-* xtensa*-*-* arc*-*-*"} } } */ /* { dg-options "-O2 -fdump-tree-forwprop1" } */ +/* Skip on ARM Cortex-M, where LOGICAL_OP_NON_SHORT_CIRCUIT is set to false, + leading to two conditional jumps when evaluating an && condition. Forwprop1 + is not able to optimize this. */ +/* { dg-skip-if "" { arm_cortex_m } } */ extern char *frob (void); extern _Bool testit (void); diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ssa-dom-thread-4.c b/gcc/testsuite/gcc.dg/tree-ssa/ssa-dom-thread-4.c index fec3075618f..0e4797cbd9f 100644 --- a/gcc/testsuite/gcc.dg/tree-ssa/ssa-dom-thread-4.c +++ b/gcc/testsuite/gcc.dg/tree-ssa/ssa-dom-thread-4.c @@ -59,9 +59,9 @@ bitmap_ior_and_compl (bitmap dst, const_bitmap a, const_bitmap b, code we missed the edge when the first conditional is false (b_elt is zero, which means the second conditional is always zero. */ -/* ARM Cortex-M0 defined LOGICAL_OP_NON_SHORT_CIRCUIT to false, +/* ARM Cortex-M defined LOGICAL_OP_NON_SHORT_CIRCUIT to false, so skip below test. */ -/* { dg-final { scan-tree-dump-times "Threaded" 3 "dom1" { target { ! { { mips*-*-* avr-*-* arc*-*-* } || { arm_cortex_m && arm_thumb1 } } } } } } */ +/* { dg-final { scan-tree-dump-times "Threaded" 3 "dom1" { target { ! { { mips*-*-* avr-*-* arc*-*-* } || { arm_cortex_m } } } } } } */ /* MIPS defines LOGICAL_OP_NON_SHORT_CIRCUIT to 0, so we split both "a_elt || b_elt" and "b_elt && kill_elt" into two conditions each, rather than using "(var1 != 0) op (var2 != 0)". Also, as on other targets, diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ssa-vrp-thread-1.c b/gcc/testsuite/gcc.dg/tree-ssa/ssa-vrp-thread-1.c index 9d9473e7f31..b498d8bd898 100644 --- a/gcc/testsuite/gcc.dg/tree-ssa/ssa-vrp-thread-1.c +++ b/gcc/testsuite/gcc.dg/tree-ssa/ssa-vrp-thread-1.c @@ -26,6 +26,8 @@ build_omp_regions_1 (basic_block bb, struct omp_region *parent, oof (); } -/* { dg-final { scan-tree-dump-times "Threaded" 1 "vrp1" } } */ +/* ARM Cortex-M defined LOGICAL_OP_NON_SHORT_CIRCUIT to false, + so skip below test. */ +/* { dg-final { { scan-tree-dump-times "Threaded" 1 "vrp1" } || { arm_cortex_m } } } */ /* { dg-final { cleanup-tree-dump "vrp1" } } */ diff --git a/gcc/testsuite/gcc.dg/tree-ssa/vrp47.c b/gcc/testsuite/gcc.dg/tree-ssa/vrp47.c index 74b520b5f54..5a09fa0f49d 100644 --- a/gcc/testsuite/gcc.dg/tree-ssa/vrp47.c +++ b/gcc/testsuite/gcc.dg/tree-ssa/vrp47.c @@ -6,10 +6,10 @@ /* { dg-do compile { target { ! "mips*-*-* arc*-*-* s390*-*-* avr-*-* mn10300-*-*" } } } */ /* { dg-options "-O2 -fdump-tree-vrp1 -fdump-tree-dom1 -fdump-tree-vrp2" } */ /* { dg-additional-options "-march=i586" { target { { i?86-*-* x86_64-*-* } && ia32 } } } */ -/* Skip on ARM Cortex-M0, where LOGICAL_OP_NON_SHORT_CIRCUIT is set to false, +/* Skip on ARM Cortex-M, where LOGICAL_OP_NON_SHORT_CIRCUIT is set to false, leading to two conditional jumps when evaluating an && condition. VRP is not able to optimize this. */ -/* { dg-skip-if "" { arm_cortex_m && arm_thumb1} } */ +/* { dg-skip-if "" { arm_cortex_m } } */ int h(int x, int y) { diff --git a/gcc/testsuite/gcc.dg/tree-ssa/vrp87.c b/gcc/testsuite/gcc.dg/tree-ssa/vrp87.c index aa85191419e..9aff0a6c46f 100644 --- a/gcc/testsuite/gcc.dg/tree-ssa/vrp87.c +++ b/gcc/testsuite/gcc.dg/tree-ssa/vrp87.c @@ -2,6 +2,10 @@ /* { dg-options "-O2 -fdump-tree-vrp2-details -fdump-tree-cddce2-details" } */ /* { dg-additional-options "-mbranch-cost=2" { target avr-*-* } } */ +/* Skip on ARM Cortex-M, where LOGICAL_OP_NON_SHORT_CIRCUIT is set to false, + leading to two conditional jumps when evaluating an && condition. VRP is + not able to optimize this. */ +/* { dg-skip-if "" { arm_cortex_m } } */ struct bitmap_head_def; typedef struct bitmap_head_def *bitmap; |