summaryrefslogtreecommitdiff
path: root/gcc/config/i386
diff options
context:
space:
mode:
authorRichard Sandiford <richard.sandiford@linaro.org>2018-01-02 18:26:27 +0000
committerRichard Sandiford <rsandifo@gcc.gnu.org>2018-01-02 18:26:27 +0000
commitf151c9e1414c00e300c9385bc9512c3d9a481296 (patch)
treecc430a054b11cef7b925002187dba5a721bf148c /gcc/config/i386
parent279b805713fd498afb7986698a2e3406bc947d87 (diff)
downloadgcc-f151c9e1414c00e300c9385bc9512c3d9a481296.tar.gz
Remove vec_perm_const optab
One of the changes needed for variable-length VEC_PERM_EXPRs -- and for long fixed-length VEC_PERM_EXPRs -- is the ability to use constant selectors that wouldn't fit in the vectors being permuted. E.g. a permute on two V256QIs can't be done using a V256QI selector. At the moment constant permutes use two interfaces: targetm.vectorizer.vec_perm_const_ok for testing whether a permute is valid and the vec_perm_const optab for actually emitting the permute. The former gets passed a vec<> selector and the latter an rtx selector. Most ports share a lot of code between the hook and the optab, with a wrapper function for each interface. We could try to keep that interface and require ports to define wider vector modes that could be attached to the CONST_VECTOR (e.g. V256HI or V256SI in the example above). But building a CONST_VECTOR rtx seems a bit pointless here, since the expand code only creates the CONST_VECTOR in order to call the optab, and the first thing the target does is take the CONST_VECTOR apart again. The easiest approach therefore seemed to be to remove the optab and reuse the target hook to emit the code. One potential drawback is that it's no longer possible to use match_operand predicates to force operands into the required form, but in practice all targets want register operands anyway. The patch also changes vec_perm_indices into a class that provides some simple routines for handling permutations. A later patch will flesh this out and get rid of auto_vec_perm_indices, but I didn't want to do all that in this patch and make it more complicated than it already is. 2018-01-02 Richard Sandiford <richard.sandiford@linaro.org> gcc/ * Makefile.in (OBJS): Add vec-perm-indices.o. * vec-perm-indices.h: New file. * vec-perm-indices.c: Likewise. * target.h (vec_perm_indices): Replace with a forward class declaration. (auto_vec_perm_indices): Move to vec-perm-indices.h. * optabs.h: Include vec-perm-indices.h. (expand_vec_perm): Delete. (selector_fits_mode_p, expand_vec_perm_var): Declare. (expand_vec_perm_const): Declare. * target.def (vec_perm_const_ok): Replace with... (vec_perm_const): ...this new hook. * doc/tm.texi.in (TARGET_VECTORIZE_VEC_PERM_CONST_OK): Replace with... (TARGET_VECTORIZE_VEC_PERM_CONST): ...this new hook. * doc/tm.texi: Regenerate. * optabs.def (vec_perm_const): Delete. * doc/md.texi (vec_perm_const): Likewise. (vec_perm): Refer to TARGET_VECTORIZE_VEC_PERM_CONST. * expr.c (expand_expr_real_2): Use expand_vec_perm_const rather than expand_vec_perm for constant permutation vectors. Assert that the mode of variable permutation vectors is the integer equivalent of the mode that is being permuted. * optabs-query.h (selector_fits_mode_p): Declare. * optabs-query.c: Include vec-perm-indices.h. (selector_fits_mode_p): New function. (can_vec_perm_const_p): Check whether targetm.vectorize.vec_perm_const is defined, instead of checking whether the vec_perm_const_optab exists. Use targetm.vectorize.vec_perm_const instead of targetm.vectorize.vec_perm_const_ok. Check whether the indices fit in the vector mode before using a variable permute. * optabs.c (shift_amt_for_vec_perm_mask): Take a mode and a vec_perm_indices instead of an rtx. (expand_vec_perm): Replace with... (expand_vec_perm_const): ...this new function. Take the selector as a vec_perm_indices rather than an rtx. Also take the mode of the selector. Update call to shift_amt_for_vec_perm_mask. Use targetm.vectorize.vec_perm_const instead of vec_perm_const_optab. Use vec_perm_indices::new_expanded_vector to expand the original selector into bytes. Check whether the indices fit in the vector mode before using a variable permute. (expand_vec_perm_var): Make global. (expand_mult_highpart): Use expand_vec_perm_const. * fold-const.c: Includes vec-perm-indices.h. * tree-ssa-forwprop.c: Likewise. * tree-vect-data-refs.c: Likewise. * tree-vect-generic.c: Likewise. * tree-vect-loop.c: Likewise. * tree-vect-slp.c: Likewise. * tree-vect-stmts.c: Likewise. * config/aarch64/aarch64-protos.h (aarch64_expand_vec_perm_const): Delete. * config/aarch64/aarch64-simd.md (vec_perm_const<mode>): Delete. * config/aarch64/aarch64.c (aarch64_expand_vec_perm_const) (aarch64_vectorize_vec_perm_const_ok): Fuse into... (aarch64_vectorize_vec_perm_const): ...this new function. (TARGET_VECTORIZE_VEC_PERM_CONST_OK): Delete. (TARGET_VECTORIZE_VEC_PERM_CONST): Redefine. * config/arm/arm-protos.h (arm_expand_vec_perm_const): Delete. * config/arm/vec-common.md (vec_perm_const<mode>): Delete. * config/arm/arm.c (TARGET_VECTORIZE_VEC_PERM_CONST_OK): Delete. (TARGET_VECTORIZE_VEC_PERM_CONST): Redefine. (arm_expand_vec_perm_const, arm_vectorize_vec_perm_const_ok): Merge into... (arm_vectorize_vec_perm_const): ...this new function. Explicitly check for NEON modes. * config/i386/i386-protos.h (ix86_expand_vec_perm_const): Delete. * config/i386/sse.md (VEC_PERM_CONST, vec_perm_const<mode>): Delete. * config/i386/i386.c (ix86_expand_vec_perm_const_1): Update comment. (ix86_expand_vec_perm_const, ix86_vectorize_vec_perm_const_ok): Merge into... (ix86_vectorize_vec_perm_const): ...this new function. Incorporate the old VEC_PERM_CONST conditions. * config/ia64/ia64-protos.h (ia64_expand_vec_perm_const): Delete. * config/ia64/vect.md (vec_perm_const<mode>): Delete. * config/ia64/ia64.c (ia64_expand_vec_perm_const) (ia64_vectorize_vec_perm_const_ok): Merge into... (ia64_vectorize_vec_perm_const): ...this new function. * config/mips/loongson.md (vec_perm_const<mode>): Delete. * config/mips/mips-msa.md (vec_perm_const<mode>): Delete. * config/mips/mips-ps-3d.md (vec_perm_constv2sf): Delete. * config/mips/mips-protos.h (mips_expand_vec_perm_const): Delete. * config/mips/mips.c (mips_expand_vec_perm_const) (mips_vectorize_vec_perm_const_ok): Merge into... (mips_vectorize_vec_perm_const): ...this new function. * config/powerpcspe/altivec.md (vec_perm_constv16qi): Delete. * config/powerpcspe/paired.md (vec_perm_constv2sf): Delete. * config/powerpcspe/spe.md (vec_perm_constv2si): Delete. * config/powerpcspe/vsx.md (vec_perm_const<mode>): Delete. * config/powerpcspe/powerpcspe-protos.h (altivec_expand_vec_perm_const) (rs6000_expand_vec_perm_const): Delete. * config/powerpcspe/powerpcspe.c (TARGET_VECTORIZE_VEC_PERM_CONST_OK): Delete. (TARGET_VECTORIZE_VEC_PERM_CONST): Redefine. (altivec_expand_vec_perm_const_le): Take each operand individually. Operate on constant selectors rather than rtxes. (altivec_expand_vec_perm_const): Likewise. Update call to altivec_expand_vec_perm_const_le. (rs6000_expand_vec_perm_const): Delete. (rs6000_vectorize_vec_perm_const_ok): Delete. (rs6000_vectorize_vec_perm_const): New function. (rs6000_do_expand_vec_perm): Take a vec_perm_builder instead of an element count and rtx array. (rs6000_expand_extract_even): Update call accordingly. (rs6000_expand_interleave): Likewise. * config/rs6000/altivec.md (vec_perm_constv16qi): Delete. * config/rs6000/paired.md (vec_perm_constv2sf): Delete. * config/rs6000/vsx.md (vec_perm_const<mode>): Delete. * config/rs6000/rs6000-protos.h (altivec_expand_vec_perm_const) (rs6000_expand_vec_perm_const): Delete. * config/rs6000/rs6000.c (TARGET_VECTORIZE_VEC_PERM_CONST_OK): Delete. (TARGET_VECTORIZE_VEC_PERM_CONST): Redefine. (altivec_expand_vec_perm_const_le): Take each operand individually. Operate on constant selectors rather than rtxes. (altivec_expand_vec_perm_const): Likewise. Update call to altivec_expand_vec_perm_const_le. (rs6000_expand_vec_perm_const): Delete. (rs6000_vectorize_vec_perm_const_ok): Delete. (rs6000_vectorize_vec_perm_const): New function. Remove stray reference to the SPE evmerge intructions. (rs6000_do_expand_vec_perm): Take a vec_perm_builder instead of an element count and rtx array. (rs6000_expand_extract_even): Update call accordingly. (rs6000_expand_interleave): Likewise. * config/sparc/sparc.md (vec_perm_constv8qi): Delete in favor of... * config/sparc/sparc.c (sparc_vectorize_vec_perm_const): ...this new function. (TARGET_VECTORIZE_VEC_PERM_CONST): Redefine. From-SVN: r256093
Diffstat (limited to 'gcc/config/i386')
-rw-r--r--gcc/config/i386/i386-protos.h1
-rw-r--r--gcc/config/i386/i386.c181
-rw-r--r--gcc/config/i386/sse.md24
3 files changed, 90 insertions, 116 deletions
diff --git a/gcc/config/i386/i386-protos.h b/gcc/config/i386/i386-protos.h
index f5755f0d363..287b0198589 100644
--- a/gcc/config/i386/i386-protos.h
+++ b/gcc/config/i386/i386-protos.h
@@ -133,7 +133,6 @@ extern bool ix86_expand_fp_movcc (rtx[]);
extern bool ix86_expand_fp_vcond (rtx[]);
extern bool ix86_expand_int_vcond (rtx[]);
extern void ix86_expand_vec_perm (rtx[]);
-extern bool ix86_expand_vec_perm_const (rtx[]);
extern bool ix86_expand_mask_vec_cmp (rtx[]);
extern bool ix86_expand_int_vec_cmp (rtx[]);
extern bool ix86_expand_fp_vec_cmp (rtx[]);
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index 9ff9ca4e37f..1acb2c6ab83 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -47605,9 +47605,8 @@ expand_vec_perm_vpshufb4_vpermq2 (struct expand_vec_perm_d *d)
return true;
}
-/* The guts of ix86_expand_vec_perm_const, also used by the ok hook.
- With all of the interface bits taken care of, perform the expansion
- in D and return true on success. */
+/* The guts of ix86_vectorize_vec_perm_const. With all of the interface bits
+ taken care of, perform the expansion in D and return true on success. */
static bool
ix86_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
@@ -47742,69 +47741,29 @@ canonicalize_perm (struct expand_vec_perm_d *d)
return (which == 3);
}
-bool
-ix86_expand_vec_perm_const (rtx operands[4])
+/* Implement TARGET_VECTORIZE_VEC_PERM_CONST. */
+
+static bool
+ix86_vectorize_vec_perm_const (machine_mode vmode, rtx target, rtx op0,
+ rtx op1, const vec_perm_indices &sel)
{
struct expand_vec_perm_d d;
unsigned char perm[MAX_VECT_LEN];
- int i, nelt;
+ unsigned int i, nelt, which;
bool two_args;
- rtx sel;
- d.target = operands[0];
- d.op0 = operands[1];
- d.op1 = operands[2];
- sel = operands[3];
+ d.target = target;
+ d.op0 = op0;
+ d.op1 = op1;
- d.vmode = GET_MODE (d.target);
+ d.vmode = vmode;
gcc_assert (VECTOR_MODE_P (d.vmode));
d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
- d.testing_p = false;
+ d.testing_p = !target;
- gcc_assert (GET_CODE (sel) == CONST_VECTOR);
- gcc_assert (XVECLEN (sel, 0) == nelt);
+ gcc_assert (sel.length () == nelt);
gcc_checking_assert (sizeof (d.perm) == sizeof (perm));
- for (i = 0; i < nelt; ++i)
- {
- rtx e = XVECEXP (sel, 0, i);
- int ei = INTVAL (e) & (2 * nelt - 1);
- d.perm[i] = ei;
- perm[i] = ei;
- }
-
- two_args = canonicalize_perm (&d);
-
- if (ix86_expand_vec_perm_const_1 (&d))
- return true;
-
- /* If the selector says both arguments are needed, but the operands are the
- same, the above tried to expand with one_operand_p and flattened selector.
- If that didn't work, retry without one_operand_p; we succeeded with that
- during testing. */
- if (two_args && d.one_operand_p)
- {
- d.one_operand_p = false;
- memcpy (d.perm, perm, sizeof (perm));
- return ix86_expand_vec_perm_const_1 (&d);
- }
-
- return false;
-}
-
-/* Implement targetm.vectorize.vec_perm_const_ok. */
-
-static bool
-ix86_vectorize_vec_perm_const_ok (machine_mode vmode, vec_perm_indices sel)
-{
- struct expand_vec_perm_d d;
- unsigned int i, nelt, which;
- bool ret;
-
- d.vmode = vmode;
- d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
- d.testing_p = true;
-
/* Given sufficient ISA support we can just return true here
for selected vector modes. */
switch (d.vmode)
@@ -47813,17 +47772,23 @@ ix86_vectorize_vec_perm_const_ok (machine_mode vmode, vec_perm_indices sel)
case E_V16SImode:
case E_V8DImode:
case E_V8DFmode:
- if (TARGET_AVX512F)
- /* All implementable with a single vperm[it]2 insn. */
+ if (!TARGET_AVX512F)
+ return false;
+ /* All implementable with a single vperm[it]2 insn. */
+ if (d.testing_p)
return true;
break;
case E_V32HImode:
- if (TARGET_AVX512BW)
+ if (!TARGET_AVX512BW)
+ return false;
+ if (d.testing_p)
/* All implementable with a single vperm[it]2 insn. */
return true;
break;
case E_V64QImode:
- if (TARGET_AVX512BW)
+ if (!TARGET_AVX512BW)
+ return false;
+ if (d.testing_p)
/* Implementable with 2 vperm[it]2, 2 vpshufb and 1 or insn. */
return true;
break;
@@ -47831,73 +47796,108 @@ ix86_vectorize_vec_perm_const_ok (machine_mode vmode, vec_perm_indices sel)
case E_V8SFmode:
case E_V4DFmode:
case E_V4DImode:
- if (TARGET_AVX512VL)
+ if (!TARGET_AVX)
+ return false;
+ if (d.testing_p && TARGET_AVX512VL)
/* All implementable with a single vperm[it]2 insn. */
return true;
break;
case E_V16HImode:
- if (TARGET_AVX2)
+ if (!TARGET_SSE2)
+ return false;
+ if (d.testing_p && TARGET_AVX2)
/* Implementable with 4 vpshufb insns, 2 vpermq and 3 vpor insns. */
return true;
break;
case E_V32QImode:
- if (TARGET_AVX2)
+ if (!TARGET_SSE2)
+ return false;
+ if (d.testing_p && TARGET_AVX2)
/* Implementable with 4 vpshufb insns, 2 vpermq and 3 vpor insns. */
return true;
break;
- case E_V4SImode:
- case E_V4SFmode:
case E_V8HImode:
case E_V16QImode:
+ if (!TARGET_SSE2)
+ return false;
+ /* Fall through. */
+ case E_V4SImode:
+ case E_V4SFmode:
+ if (!TARGET_SSE)
+ return false;
/* All implementable with a single vpperm insn. */
- if (TARGET_XOP)
+ if (d.testing_p && TARGET_XOP)
return true;
/* All implementable with 2 pshufb + 1 ior. */
- if (TARGET_SSSE3)
+ if (d.testing_p && TARGET_SSSE3)
return true;
break;
case E_V2DImode:
case E_V2DFmode:
+ if (!TARGET_SSE)
+ return false;
/* All implementable with shufpd or unpck[lh]pd. */
- return true;
+ if (d.testing_p)
+ return true;
+ break;
default:
return false;
}
- /* Extract the values from the vector CST into the permutation
- array in D. */
for (i = which = 0; i < nelt; ++i)
{
unsigned char e = sel[i];
gcc_assert (e < 2 * nelt);
d.perm[i] = e;
+ perm[i] = e;
which |= (e < nelt ? 1 : 2);
}
- /* For all elements from second vector, fold the elements to first. */
- if (which == 2)
- for (i = 0; i < nelt; ++i)
- d.perm[i] -= nelt;
+ if (d.testing_p)
+ {
+ /* For all elements from second vector, fold the elements to first. */
+ if (which == 2)
+ for (i = 0; i < nelt; ++i)
+ d.perm[i] -= nelt;
- /* Check whether the mask can be applied to the vector type. */
- d.one_operand_p = (which != 3);
+ /* Check whether the mask can be applied to the vector type. */
+ d.one_operand_p = (which != 3);
- /* Implementable with shufps or pshufd. */
- if (d.one_operand_p && (d.vmode == V4SFmode || d.vmode == V4SImode))
- return true;
+ /* Implementable with shufps or pshufd. */
+ if (d.one_operand_p && (d.vmode == V4SFmode || d.vmode == V4SImode))
+ return true;
- /* Otherwise we have to go through the motions and see if we can
- figure out how to generate the requested permutation. */
- d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
- d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
- if (!d.one_operand_p)
- d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
+ /* Otherwise we have to go through the motions and see if we can
+ figure out how to generate the requested permutation. */
+ d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
+ d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
+ if (!d.one_operand_p)
+ d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
- start_sequence ();
- ret = ix86_expand_vec_perm_const_1 (&d);
- end_sequence ();
+ start_sequence ();
+ bool ret = ix86_expand_vec_perm_const_1 (&d);
+ end_sequence ();
- return ret;
+ return ret;
+ }
+
+ two_args = canonicalize_perm (&d);
+
+ if (ix86_expand_vec_perm_const_1 (&d))
+ return true;
+
+ /* If the selector says both arguments are needed, but the operands are the
+ same, the above tried to expand with one_operand_p and flattened selector.
+ If that didn't work, retry without one_operand_p; we succeeded with that
+ during testing. */
+ if (two_args && d.one_operand_p)
+ {
+ d.one_operand_p = false;
+ memcpy (d.perm, perm, sizeof (perm));
+ return ix86_expand_vec_perm_const_1 (&d);
+ }
+
+ return false;
}
void
@@ -50549,9 +50549,8 @@ ix86_run_selftests (void)
#undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
#define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
ix86_builtin_vectorization_cost
-#undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
-#define TARGET_VECTORIZE_VEC_PERM_CONST_OK \
- ix86_vectorize_vec_perm_const_ok
+#undef TARGET_VECTORIZE_VEC_PERM_CONST
+#define TARGET_VECTORIZE_VEC_PERM_CONST ix86_vectorize_vec_perm_const
#undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
#define TARGET_VECTORIZE_PREFERRED_SIMD_MODE \
ix86_preferred_simd_mode
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index 105b5cf6092..76c150fe8ec 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -11498,30 +11498,6 @@
DONE;
})
-(define_mode_iterator VEC_PERM_CONST
- [(V4SF "TARGET_SSE") (V4SI "TARGET_SSE")
- (V2DF "TARGET_SSE") (V2DI "TARGET_SSE")
- (V16QI "TARGET_SSE2") (V8HI "TARGET_SSE2")
- (V8SF "TARGET_AVX") (V4DF "TARGET_AVX")
- (V8SI "TARGET_AVX") (V4DI "TARGET_AVX")
- (V32QI "TARGET_AVX2") (V16HI "TARGET_AVX2")
- (V16SI "TARGET_AVX512F") (V8DI "TARGET_AVX512F")
- (V16SF "TARGET_AVX512F") (V8DF "TARGET_AVX512F")
- (V32HI "TARGET_AVX512BW") (V64QI "TARGET_AVX512BW")])
-
-(define_expand "vec_perm_const<mode>"
- [(match_operand:VEC_PERM_CONST 0 "register_operand")
- (match_operand:VEC_PERM_CONST 1 "register_operand")
- (match_operand:VEC_PERM_CONST 2 "register_operand")
- (match_operand:<sseintvecmode> 3)]
- ""
-{
- if (ix86_expand_vec_perm_const (operands))
- DONE;
- else
- FAIL;
-})
-
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;
;; Parallel bitwise logical operations