diff options
author | jakub <jakub@138bc75d-0d04-0410-961f-82ee72b054a4> | 2011-11-07 16:00:08 +0000 |
---|---|---|
committer | jakub <jakub@138bc75d-0d04-0410-961f-82ee72b054a4> | 2011-11-07 16:00:08 +0000 |
commit | 6296bd9601caec8b0c01944c5a06eb1fcfd216c6 (patch) | |
tree | c76018ce8a3bf4d0b9e9c7b74c7f46fd636d2477 /gcc/config | |
parent | 16dfb11221da8bb69bfcc5ca457cb6a6f145b03a (diff) | |
download | gcc-6296bd9601caec8b0c01944c5a06eb1fcfd216c6.tar.gz |
* config/i386/i386.c (ix86_expand_builtin): If gather mask
argument is known to have all high bits set, pass pc_rtx as
second argument to the expander instead of op0.
* config/i386/sse.md (*avx2_gathersi<mode>_2,
*avx2_gatherdi<mode>_2): New patterns.
* config/i386/avx2intrin.h (_mm256_i32gather_pd,
_mm256_i64gather_pd, _mm256_i32gather_ps): Set mask using
_mm256_cmp_pd with zero vector arguments and _CMP_EQ_OQ instead of
_mm256_set1_pd.
git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@181090 138bc75d-0d04-0410-961f-82ee72b054a4
Diffstat (limited to 'gcc/config')
-rw-r--r-- | gcc/config/i386/avx2intrin.h | 6 | ||||
-rw-r--r-- | gcc/config/i386/i386.c | 65 | ||||
-rw-r--r-- | gcc/config/i386/sse.md | 44 |
3 files changed, 112 insertions, 3 deletions
diff --git a/gcc/config/i386/avx2intrin.h b/gcc/config/i386/avx2intrin.h index 3c8f3600d68..12ed05fe029 100644 --- a/gcc/config/i386/avx2intrin.h +++ b/gcc/config/i386/avx2intrin.h @@ -1252,7 +1252,7 @@ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_i32gather_pd (double const *base, __m128i index, const int scale) { __v4df src = _mm256_setzero_pd (); - __v4df mask = _mm256_set1_pd((double)(long long int) -1); + __v4df mask = _mm256_cmp_pd (src, src, _CMP_EQ_OQ); return (__m256d) __builtin_ia32_gathersiv4df (src, base, @@ -1304,7 +1304,7 @@ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_i64gather_pd (double const *base, __m256i index, const int scale) { __v4df src = _mm256_setzero_pd (); - __v4df mask = _mm256_set1_pd((double)(long long int) -1); + __v4df mask = _mm256_cmp_pd (src, src, _CMP_EQ_OQ); return (__m256d) __builtin_ia32_gatherdiv4df (src, base, @@ -1356,7 +1356,7 @@ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_i32gather_ps (float const *base, __m256i index, const int scale) { __v8sf src = _mm256_setzero_ps (); - __v8sf mask = _mm256_set1_ps((float)(int) -1); + __v8sf mask = _mm256_cmp_ps (src, src, _CMP_EQ_OQ); return (__m256) __builtin_ia32_gathersiv8sf (src, base, diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index 4d7d2cf1d4a..4461fbba82d 100644 --- a/gcc/config/i386/i386.c +++ b/gcc/config/i386/i386.c @@ -29087,6 +29087,71 @@ rdrand_step: error ("last argument must be scale 1, 2, 4, 8"); return const0_rtx; } + + /* Optimize. If mask is known to have all high bits set, + replace op0 with pc_rtx to signal that the instruction + overwrites the whole destination and doesn't use its + previous contents. */ + if (optimize) + { + if (TREE_CODE (arg3) == VECTOR_CST) + { + tree elt; + unsigned int negative = 0; + for (elt = TREE_VECTOR_CST_ELTS (arg3); + elt; elt = TREE_CHAIN (elt)) + { + tree cst = TREE_VALUE (elt); + if (TREE_CODE (cst) == INTEGER_CST + && tree_int_cst_sign_bit (cst)) + negative++; + else if (TREE_CODE (cst) == REAL_CST + && REAL_VALUE_NEGATIVE (TREE_REAL_CST (cst))) + negative++; + } + if (negative == TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg3))) + op0 = pc_rtx; + } + else if (TREE_CODE (arg3) == SSA_NAME) + { + /* Recognize also when mask is like: + __v2df src = _mm_setzero_pd (); + __v2df mask = _mm_cmpeq_pd (src, src); + or + __v8sf src = _mm256_setzero_ps (); + __v8sf mask = _mm256_cmp_ps (src, src, _CMP_EQ_OQ); + as that is a cheaper way to load all ones into + a register than having to load a constant from + memory. */ + gimple def_stmt = SSA_NAME_DEF_STMT (arg3); + if (is_gimple_call (def_stmt)) + { + tree fndecl = gimple_call_fndecl (def_stmt); + if (fndecl + && DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_MD) + switch ((unsigned int) DECL_FUNCTION_CODE (fndecl)) + { + case IX86_BUILTIN_CMPPD: + case IX86_BUILTIN_CMPPS: + case IX86_BUILTIN_CMPPD256: + case IX86_BUILTIN_CMPPS256: + if (!integer_zerop (gimple_call_arg (def_stmt, 2))) + break; + /* FALLTHRU */ + case IX86_BUILTIN_CMPEQPD: + case IX86_BUILTIN_CMPEQPS: + if (initializer_zerop (gimple_call_arg (def_stmt, 0)) + && initializer_zerop (gimple_call_arg (def_stmt, + 1))) + op0 = pc_rtx; + break; + default: + break; + } + } + } + } + pat = GEN_FCN (icode) (subtarget, op0, op1, op2, op3, op4); if (! pat) return const0_rtx; diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index e3de9ecdd24..688b5be9648 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -12567,6 +12567,26 @@ (set_attr "prefix" "vex") (set_attr "mode" "<sseinsnmode>")]) +(define_insn "*avx2_gathersi<mode>_2" + [(set (match_operand:VEC_GATHER_MODE 0 "register_operand" "=&x") + (unspec:VEC_GATHER_MODE + [(pc) + (match_operator:<ssescalarmode> 6 "vsib_mem_operator" + [(unspec:P + [(match_operand:P 2 "vsib_address_operand" "p") + (match_operand:<VEC_GATHER_IDXSI> 3 "register_operand" "x") + (match_operand:SI 5 "const1248_operand" "n")] + UNSPEC_VSIBADDR)]) + (mem:BLK (scratch)) + (match_operand:VEC_GATHER_MODE 4 "register_operand" "1")] + UNSPEC_GATHER)) + (clobber (match_scratch:VEC_GATHER_MODE 1 "=&x"))] + "TARGET_AVX2" + "v<sseintprefix>gatherd<ssemodesuffix>\t{%1, %6, %0|%0, %6, %1}" + [(set_attr "type" "ssemov") + (set_attr "prefix" "vex") + (set_attr "mode" "<sseinsnmode>")]) + (define_expand "avx2_gatherdi<mode>" [(parallel [(set (match_operand:VEC_GATHER_MODE 0 "register_operand" "") (unspec:VEC_GATHER_MODE @@ -12608,3 +12628,27 @@ [(set_attr "type" "ssemov") (set_attr "prefix" "vex") (set_attr "mode" "<sseinsnmode>")]) + +(define_insn "*avx2_gatherdi<mode>_2" + [(set (match_operand:VEC_GATHER_MODE 0 "register_operand" "=&x") + (unspec:VEC_GATHER_MODE + [(pc) + (match_operator:<ssescalarmode> 6 "vsib_mem_operator" + [(unspec:P + [(match_operand:P 2 "vsib_address_operand" "p") + (match_operand:<VEC_GATHER_IDXDI> 3 "register_operand" "x") + (match_operand:SI 5 "const1248_operand" "n")] + UNSPEC_VSIBADDR)]) + (mem:BLK (scratch)) + (match_operand:<VEC_GATHER_SRCDI> 4 "register_operand" "1")] + UNSPEC_GATHER)) + (clobber (match_scratch:VEC_GATHER_MODE 1 "=&x"))] + "TARGET_AVX2" +{ + if (<MODE>mode != <VEC_GATHER_SRCDI>mode) + return "v<sseintprefix>gatherq<ssemodesuffix>\t{%4, %6, %x0|%x0, %6, %4}"; + return "v<sseintprefix>gatherq<ssemodesuffix>\t{%4, %6, %0|%0, %6, %4}"; +} + [(set_attr "type" "ssemov") + (set_attr "prefix" "vex") + (set_attr "mode" "<sseinsnmode>")]) |