diff options
author | H.J. Lu <hjl.tools@gmail.com> | 2016-03-05 07:17:09 -0800 |
---|---|---|
committer | H.J. Lu <hjl.tools@gmail.com> | 2016-08-08 08:48:50 -0700 |
commit | ed753f050e3162f0f2ec16801a630581261d7274 (patch) | |
tree | 344d197831a31ed73afdcba9d889a02da204563a | |
parent | 095ebaf251b15f1c1fd942aa0394c019d5948000 (diff) | |
download | gcc-ed753f050e3162f0f2ec16801a630581261d7274.tar.gz |
Update i386 piecewise move and store
We can use TImode/OImode/XImode integers for piecewise move and store.
When vector register is used for piecewise move and store, we don't
increase stack_alignment_needed since vector register spill isn't
required for piecewise move and store. Since stack_realign_needed is
set to true by checking stack_alignment_estimated set by pseudo vector
register usage, we also need to check stack_realign_needed to eliminate
frame pointer.
* config/i386/i386.c (ix86_finalize_stack_realign_flags): Also
check stack_realign_needed for stack realignment.
(ix86_legitimate_constant_p): Always allow CONST_WIDE_INT smaller
than the largest integer supported by vector register.
* config/i386/i386.h (MOVE_MAX): Set to 64.
(MOVE_MAX_PIECES): Set to bytes of the largest integer supported
by vector register.
(STORE_MAX_PIECES): New.
-rw-r--r-- | gcc/config/i386/i386.c | 19 | ||||
-rw-r--r-- | gcc/config/i386/i386.h | 25 |
2 files changed, 39 insertions, 5 deletions
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index 60aa992c68e..a07f9d56b82 100644 --- a/gcc/config/i386/i386.c +++ b/gcc/config/i386/i386.c @@ -13286,8 +13286,15 @@ ix86_finalize_stack_realign_flags (void) /* If the only reason for frame_pointer_needed is that we conservatively assumed stack realignment might be needed, but in the end nothing that needed the stack alignment had been spilled, clear frame_pointer_needed - and say we don't need stack realignment. */ - if (stack_realign + and say we don't need stack realignment. + + When vector register is used for piecewise move and store, we don't + increase stack_alignment_needed as there is no register spill for + piecewise move and store. Since stack_realign_needed is set to true + by checking stack_alignment_estimated which is updated by pseudo + vector register usage, we also need to check stack_realign_needed to + eliminate frame pointer. */ + if ((stack_realign || crtl->stack_realign_needed) && frame_pointer_needed && crtl->is_leaf && flag_omit_frame_pointer @@ -15228,7 +15235,13 @@ ix86_legitimate_constant_p (machine_mode mode, rtx x) /* FALLTHRU */ case OImode: case XImode: - if (!standard_sse_constant_p (x, mode)) + if (!standard_sse_constant_p (x, mode) + && GET_MODE_SIZE (TARGET_AVX512F + ? XImode + : (TARGET_AVX + ? OImode + : (TARGET_SSE2 + ? TImode : DImode))) < GET_MODE_SIZE (mode)) return false; default: break; diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h index 9b662648f7f..b8bd54a6728 100644 --- a/gcc/config/i386/i386.h +++ b/gcc/config/i386/i386.h @@ -1946,12 +1946,33 @@ typedef struct ix86_args { /* Max number of bytes we can move from memory to memory in one reasonably fast instruction. */ -#define MOVE_MAX 16 +#define MOVE_MAX 64 /* MOVE_MAX_PIECES is the number of bytes at a time which we can move efficiently, as opposed to MOVE_MAX which is the maximum number of bytes we can move with a single instruction. */ -#define MOVE_MAX_PIECES UNITS_PER_WORD +#define MOVE_MAX_PIECES \ + (TARGET_AVX512F \ + ? 64 \ + : (TARGET_AVX \ + ? ((!TARGET_AVX256_SPLIT_UNALIGNED_LOAD \ + && !TARGET_AVX256_SPLIT_UNALIGNED_STORE) ? 32 : 16) \ + : ((TARGET_SSE2 \ + && TARGET_SSE_UNALIGNED_LOAD_OPTIMAL \ + && TARGET_SSE_UNALIGNED_STORE_OPTIMAL) \ + ? 16 : UNITS_PER_WORD))) + +/* STORE_MAX_PIECES is the number of bytes at a time that we can + store efficiently. */ +#define STORE_MAX_PIECES \ + (TARGET_AVX512F \ + ? 64 \ + : (TARGET_AVX \ + ? ((!TARGET_AVX256_SPLIT_UNALIGNED_STORE \ + && !TARGET_AVX256_SPLIT_UNALIGNED_STORE) ? 32 : 16) \ + : ((TARGET_SSE2 \ + && TARGET_SSE_UNALIGNED_STORE_OPTIMAL) \ + ? 16 : UNITS_PER_WORD))) /* If a memory-to-memory move would take MOVE_RATIO or more simple move-instruction pairs, we will do a movmem or libcall instead. |