From 08fded5fcfa202f1db306843df19bd9045a77cec Mon Sep 17 00:00:00 2001 From: ktkachov Date: Thu, 1 Sep 2016 09:03:52 +0000 Subject: [AArch64] Add ANDS pattern for CMP+ZERO_EXTEND * config/aarch64/aarch64.md (*ands_compare0): New pattern. * config/aarch64/aarch64.c (aarch64_select_cc_mode): Return CC_NZmode for comparisons of integer ZERO_EXTEND against zero. * gcc.target/aarch64/ands_3.c: New test. git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@239919 138bc75d-0d04-0410-961f-82ee72b054a4 --- gcc/config/aarch64/aarch64.c | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'gcc/config/aarch64/aarch64.c') diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c index 3e663eb5f13..e813d66b40a 100644 --- a/gcc/config/aarch64/aarch64.c +++ b/gcc/config/aarch64/aarch64.c @@ -4264,6 +4264,14 @@ aarch64_select_cc_mode (RTX_CODE code, rtx x, rtx y) && (GET_MODE (x) == HImode || GET_MODE (x) == QImode)) return CC_NZmode; + /* Similarly, comparisons of zero_extends from shorter modes can + be performed using an ANDS with an immediate mask. */ + if (y == const0_rtx && GET_CODE (x) == ZERO_EXTEND + && (GET_MODE (x) == SImode || GET_MODE (x) == DImode) + && (GET_MODE (XEXP (x, 0)) == HImode || GET_MODE (XEXP (x, 0)) == QImode) + && (code == EQ || code == NE)) + return CC_NZmode; + if ((GET_MODE (x) == SImode || GET_MODE (x) == DImode) && y == const0_rtx && (code == EQ || code == NE || code == LT || code == GE) -- cgit v1.2.1 From 156b069e85e47a97b3c4193584e9385ee500ad29 Mon Sep 17 00:00:00 2001 From: wilco Date: Thu, 1 Sep 2016 11:34:49 +0000 Subject: This patch adds legitimize_address_displacement hook so that stack accesses with large offsets are split into a more efficient sequence. Unaligned and TI/TFmode use a 256-byte range, byte and halfword accesses use a 4KB range, wider accesses use a 16KB range to maximise the available addressing range and increase opportunities to share the base address. int f(int x) { int arr[8192]; arr[4096] = 0; arr[6000] = 0; arr[7000] = 0; arr[8191] = 0; return arr[x]; } Now generates: sub sp, sp, #32768 add x1, sp, 16384 str wzr, [x1] str wzr, [x1, 7616] str wzr, [x1, 11616] str wzr, [x1, 16380] ldr w0, [sp, w0, sxtw 2] add sp, sp, 32768 ret gcc/ * config/aarch64/aarch64.c (aarch64_legitimize_address_displacement): New function. (TARGET_LEGITIMIZE_ADDRESS_DISPLACEMENT): Define. git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@239923 138bc75d-0d04-0410-961f-82ee72b054a4 --- gcc/config/aarch64/aarch64.c | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) (limited to 'gcc/config/aarch64/aarch64.c') diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c index e813d66b40a..5efad462f11 100644 --- a/gcc/config/aarch64/aarch64.c +++ b/gcc/config/aarch64/aarch64.c @@ -4191,6 +4191,24 @@ aarch64_legitimate_address_p (machine_mode mode, rtx x, return aarch64_classify_address (&addr, x, mode, outer_code, strict_p); } +/* Split an out-of-range address displacement into a base and offset. + Use 4KB range for 1- and 2-byte accesses and a 16KB range otherwise + to increase opportunities for sharing the base address of different sizes. + For TI/TFmode and unaligned accesses use a 256-byte range. */ +static bool +aarch64_legitimize_address_displacement (rtx *disp, rtx *off, machine_mode mode) +{ + HOST_WIDE_INT mask = GET_MODE_SIZE (mode) < 4 ? 0xfff : 0x3fff; + + if (mode == TImode || mode == TFmode || + (INTVAL (*disp) & (GET_MODE_SIZE (mode) - 1)) != 0) + mask = 0xff; + + *off = GEN_INT (INTVAL (*disp) & ~mask); + *disp = GEN_INT (INTVAL (*disp) & mask); + return true; +} + /* Return TRUE if rtx X is immediate constant 0.0 */ bool aarch64_float_const_zero_rtx_p (rtx x) @@ -14135,6 +14153,10 @@ aarch64_optab_supported_p (int op, machine_mode mode1, machine_mode, #undef TARGET_LEGITIMATE_CONSTANT_P #define TARGET_LEGITIMATE_CONSTANT_P aarch64_legitimate_constant_p +#undef TARGET_LEGITIMIZE_ADDRESS_DISPLACEMENT +#define TARGET_LEGITIMIZE_ADDRESS_DISPLACEMENT \ + aarch64_legitimize_address_displacement + #undef TARGET_LIBGCC_CMP_RETURN_MODE #define TARGET_LIBGCC_CMP_RETURN_MODE aarch64_libgcc_cmp_return_mode -- cgit v1.2.1 From 5ca92351fb4b3df9d47260200d52af10e2fbd49e Mon Sep 17 00:00:00 2001 From: wilco Date: Wed, 7 Sep 2016 14:56:59 +0000 Subject: Improve aarch64_legitimize_address - avoid splitting the offset if it is supported. When we do split, take the mode size into account. BLKmode falls into the unaligned case but should be treated like LDP/STP. This improves codesize slightly due to fewer base address calculations: int f(int *p) { return p[5000] + p[7000]; } Now generates: f: add x0, x0, 16384 ldr w1, [x0, 3616] ldr w0, [x0, 11616] add w0, w1, w0 ret instead of: f: add x1, x0, 16384 add x0, x0, 24576 ldr w1, [x1, 3616] ldr w0, [x0, 3424] add w0, w1, w0 ret gcc/ * config/aarch64/aarch64.c (aarch64_legitimize_address): Avoid use of base_offset if offset already in range. git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@240026 138bc75d-0d04-0410-961f-82ee72b054a4 --- gcc/config/aarch64/aarch64.c | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) (limited to 'gcc/config/aarch64/aarch64.c') diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c index 5efad462f11..2be750e7de4 100644 --- a/gcc/config/aarch64/aarch64.c +++ b/gcc/config/aarch64/aarch64.c @@ -5082,9 +5082,19 @@ aarch64_legitimize_address (rtx x, rtx /* orig_x */, machine_mode mode) /* For offsets aren't a multiple of the access size, the limit is -256...255. */ else if (offset & (GET_MODE_SIZE (mode) - 1)) - base_offset = (offset + 0x100) & ~0x1ff; + { + base_offset = (offset + 0x100) & ~0x1ff; + + /* BLKmode typically uses LDP of X-registers. */ + if (mode == BLKmode) + base_offset = (offset + 512) & ~0x3ff; + } + /* Small negative offsets are supported. */ + else if (IN_RANGE (offset, -256, 0)) + base_offset = 0; + /* Use 12-bit offset by access size. */ else - base_offset = offset & ~0xfff; + base_offset = offset & (~0xfff * GET_MODE_SIZE (mode)); if (base_offset != 0) { -- cgit v1.2.1 From c72e91d30c64778fd89a2e3f351f6a9996b2a7af Mon Sep 17 00:00:00 2001 From: pinskia Date: Mon, 12 Sep 2016 21:30:33 +0000 Subject: Add tunning of ldpw for THunderX. 2016-09-12 Andrew Pinski * config/aarch64/aarch64-tuning-flags.def (SLOW_UNALIGNED_LDPW): New tuning option. * config/aarch64/aarch64.c (thunderx_tunings): Enable AARCH64_EXTRA_TUNE_SLOW_UNALIGNED_LDPW. (aarch64_operands_ok_for_ldpstp): Return false if AARCH64_EXTRA_TUNE_SLOW_UNALIGNED_LDPW and the mode was SImode and the alignment is less than 8 byte. (aarch64_operands_adjust_ok_for_ldpstp): Likewise. 2016-09-12 Andrew Pinski * gcc.target/aarch64/thunderxloadpair.c: New testcase. * gcc.target/aarch64/thunderxnoloadpair.c: New testcase. git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@240102 138bc75d-0d04-0410-961f-82ee72b054a4 --- gcc/config/aarch64/aarch64.c | 20 +++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) (limited to 'gcc/config/aarch64/aarch64.c') diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c index 2be750e7de4..3fcfaa88290 100644 --- a/gcc/config/aarch64/aarch64.c +++ b/gcc/config/aarch64/aarch64.c @@ -712,7 +712,7 @@ static const struct tune_params thunderx_tunings = 0, /* max_case_values. */ 0, /* cache_line_size. */ tune_params::AUTOPREFETCHER_OFF, /* autoprefetcher_model. */ - (AARCH64_EXTRA_TUNE_NONE) /* tune_flags. */ + (AARCH64_EXTRA_TUNE_SLOW_UNALIGNED_LDPW) /* tune_flags. */ }; static const struct tune_params xgene1_tunings = @@ -13629,6 +13629,15 @@ aarch64_operands_ok_for_ldpstp (rtx *operands, bool load, if (MEM_VOLATILE_P (mem_1) || MEM_VOLATILE_P (mem_2)) return false; + /* If we have SImode and slow unaligned ldp, + check the alignment to be at least 8 byte. */ + if (mode == SImode + && (aarch64_tune_params.extra_tuning_flags + & AARCH64_EXTRA_TUNE_SLOW_UNALIGNED_LDPW) + && !optimize_size + && MEM_ALIGN (mem_1) < 8 * BITS_PER_UNIT) + return false; + /* Check if the addresses are in the form of [base+offset]. */ extract_base_offset_in_addr (mem_1, &base_1, &offset_1); if (base_1 == NULL_RTX || offset_1 == NULL_RTX) @@ -13788,6 +13797,15 @@ aarch64_operands_adjust_ok_for_ldpstp (rtx *operands, bool load, return false; } + /* If we have SImode and slow unaligned ldp, + check the alignment to be at least 8 byte. */ + if (mode == SImode + && (aarch64_tune_params.extra_tuning_flags + & AARCH64_EXTRA_TUNE_SLOW_UNALIGNED_LDPW) + && !optimize_size + && MEM_ALIGN (mem_1) < 8 * BITS_PER_UNIT) + return false; + if (REG_P (reg_1) && FP_REGNUM_P (REGNO (reg_1))) rclass_1 = FP_REGS; else -- cgit v1.2.1 From 0f07909ca36148e0d48a8dcf24ec021a2736a25f Mon Sep 17 00:00:00 2001 From: segher Date: Wed, 14 Sep 2016 10:49:42 +0000 Subject: Delete TARGET_LRA_P from those targets that set it to "true" A few targets already always want LRA; those then do not need to override the default anymore. 2016-09-14 Segher Boessenkool * config/aarch64/aarch64.c (TARGET_LRA_P): Delete macro. * config/arm/arm.c (TARGET_LRA_P): Delete macro. * config/i386/i386.c (TARGET_LRA_P): Delete macro. * config/nds32/nds32.c (TARGET_LRA_P): Delete macro. git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@240132 138bc75d-0d04-0410-961f-82ee72b054a4 --- gcc/config/aarch64/aarch64.c | 3 --- 1 file changed, 3 deletions(-) (limited to 'gcc/config/aarch64/aarch64.c') diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c index 3fcfaa88290..6078b163548 100644 --- a/gcc/config/aarch64/aarch64.c +++ b/gcc/config/aarch64/aarch64.c @@ -14188,9 +14188,6 @@ aarch64_optab_supported_p (int op, machine_mode mode1, machine_mode, #undef TARGET_LIBGCC_CMP_RETURN_MODE #define TARGET_LIBGCC_CMP_RETURN_MODE aarch64_libgcc_cmp_return_mode -#undef TARGET_LRA_P -#define TARGET_LRA_P hook_bool_void_true - #undef TARGET_MANGLE_TYPE #define TARGET_MANGLE_TYPE aarch64_mangle_type -- cgit v1.2.1