diff options
author | kyukhin <kyukhin@138bc75d-0d04-0410-961f-82ee72b054a4> | 2014-06-18 07:46:18 +0000 |
---|---|---|
committer | kyukhin <kyukhin@138bc75d-0d04-0410-961f-82ee72b054a4> | 2014-06-18 07:46:18 +0000 |
commit | 926f7a02cf54c290980c2eb8b77974337154bb6d (patch) | |
tree | 28cbde5f2df4453f929eea8f65b8e6be0303d012 /gcc/config/i386 | |
parent | 691447aba6b3f8f3ea8402a35e7c53e2dbb9a6bd (diff) | |
download | gcc-926f7a02cf54c290980c2eb8b77974337154bb6d.tar.gz |
gcc/
* config/i386/i386.c (ix86_reassociation_width): Add alternative for
vector case.
* config/i386/i386.h (TARGET_VECTOR_PARALLEL_EXECUTION): New.
* config/i386/x86-tune.def (X86_TUNE_VECTOR_PARALLEL_EXECUTION): New.
* tree-vect-data-refs.c (vect_shift_permute_load_chain): New.
Introduces alternative way of loads group permutaions.
(vect_transform_grouped_load): Try alternative way of permutations.
gcc/testsuite/
PR tree-optimization/52252
* gcc.target/i386/pr52252-atom.c: Test on loads group of size 3.
* gcc.target/i386/pr52252-core.c: Ditto.
PR tree-optimization/61403
* gcc.target/i386/pr61403.c: Test on loads and stores group of size 3.
git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@211769 138bc75d-0d04-0410-961f-82ee72b054a4
Diffstat (limited to 'gcc/config/i386')
-rw-r--r-- | gcc/config/i386/i386.c | 10 | ||||
-rw-r--r-- | gcc/config/i386/i386.h | 2 | ||||
-rw-r--r-- | gcc/config/i386/x86-tune.def | 5 |
3 files changed, 17 insertions, 0 deletions
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index 77d54e5bcb7..8046c67c555 100644 --- a/gcc/config/i386/i386.c +++ b/gcc/config/i386/i386.c @@ -46429,6 +46429,16 @@ ix86_reassociation_width (unsigned int opc ATTRIBUTE_UNUSED, { int res = 1; + /* Vector part. */ + if (VECTOR_MODE_P (mode)) + { + if (TARGET_VECTOR_PARALLEL_EXECUTION) + return 2; + else + return 1; + } + + /* Scalar part. */ if (INTEGRAL_MODE_P (mode) && TARGET_REASSOC_INT_TO_PARALLEL) res = 2; else if (FLOAT_MODE_P (mode) && TARGET_REASSOC_FP_TO_PARALLEL) diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h index a4009d3a83b..9e3ef9424c3 100644 --- a/gcc/config/i386/i386.h +++ b/gcc/config/i386/i386.h @@ -433,6 +433,8 @@ extern unsigned char ix86_tune_features[X86_TUNE_LAST]; ix86_tune_features[X86_TUNE_USE_VECTOR_CONVERTS] #define TARGET_SLOW_PSHUFB \ ix86_tune_features[X86_TUNE_SLOW_PSHUFB] +#define TARGET_VECTOR_PARALLEL_EXECUTION \ + ix86_tune_features[X86_TUNE_VECTOR_PARALLEL_EXECUTION] #define TARGET_FUSE_CMP_AND_BRANCH_32 \ ix86_tune_features[X86_TUNE_FUSE_CMP_AND_BRANCH_32] #define TARGET_FUSE_CMP_AND_BRANCH_64 \ diff --git a/gcc/config/i386/x86-tune.def b/gcc/config/i386/x86-tune.def index 9b0ff360ab3..cb44dc3120c 100644 --- a/gcc/config/i386/x86-tune.def +++ b/gcc/config/i386/x86-tune.def @@ -390,6 +390,11 @@ DEF_TUNE (X86_TUNE_USE_VECTOR_CONVERTS, "use_vector_converts", m_AMDFAM10) DEF_TUNE (X86_TUNE_SLOW_PSHUFB, "slow_pshufb", m_BONNELL | m_SILVERMONT | m_INTEL) +/* X86_TUNE_VECTOR_PARALLEL_EXECUTION: Indicates tunings with ability to + execute 2 or more vector instructions in parallel. */ +DEF_TUNE (X86_TUNE_VECTOR_PARALLEL_EXECUTION, "vec_parallel", + m_NEHALEM | m_SANDYBRIDGE | m_HASWELL) + /*****************************************************************************/ /* AVX instruction selection tuning (some of SSE flags affects AVX, too) */ /*****************************************************************************/ |