summaryrefslogtreecommitdiff
path: root/gcc
diff options
context:
space:
mode:
Diffstat (limited to 'gcc')
-rw-r--r--gcc/ChangeLog8
-rw-r--r--gcc/config/i386/i386.c79
-rw-r--r--gcc/config/i386/sse.md32
3 files changed, 119 insertions, 0 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 48b703c09e7..d43a5cf3e5a 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,11 @@
+2011-10-14 Jakub Jelinek <jakub@redhat.com>
+
+ * config/i386/sse.md (vec_interleave_high<mode>,
+ vec_interleave_low<mode>): Add AVX2 expanders for VI_256
+ modes.
+ * config/i386/i386.c (expand_vec_perm_interleave3): New function.
+ (ix86_expand_vec_perm_builtin_1): Call it.
+
2011-10-14 Georg-Johann Lay <avr@gjlay.de>
Fix thinko from r179765
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index df6267b664c..f09a37277fa 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -35474,6 +35474,82 @@ expand_vec_perm_interleave2 (struct expand_vec_perm_d *d)
return true;
}
+/* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
+ a two vector permutation using 2 intra-lane interleave insns
+ and cross-lane shuffle for 32-byte vectors. */
+
+static bool
+expand_vec_perm_interleave3 (struct expand_vec_perm_d *d)
+{
+ unsigned i, nelt;
+ rtx (*gen) (rtx, rtx, rtx);
+
+ if (d->op0 == d->op1)
+ return false;
+ if (TARGET_AVX2 && GET_MODE_SIZE (d->vmode) == 32)
+ ;
+ else if (TARGET_AVX && (d->vmode == V8SFmode || d->vmode == V4DFmode))
+ ;
+ else
+ return false;
+
+ nelt = d->nelt;
+ if (d->perm[0] != 0 && d->perm[0] != nelt / 2)
+ return false;
+ for (i = 0; i < nelt; i += 2)
+ if (d->perm[i] != d->perm[0] + i / 2
+ || d->perm[i + 1] != d->perm[0] + i / 2 + nelt)
+ return false;
+
+ if (d->testing_p)
+ return true;
+
+ switch (d->vmode)
+ {
+ case V32QImode:
+ if (d->perm[0])
+ gen = gen_vec_interleave_highv32qi;
+ else
+ gen = gen_vec_interleave_lowv32qi;
+ break;
+ case V16HImode:
+ if (d->perm[0])
+ gen = gen_vec_interleave_highv16hi;
+ else
+ gen = gen_vec_interleave_lowv16hi;
+ break;
+ case V8SImode:
+ if (d->perm[0])
+ gen = gen_vec_interleave_highv8si;
+ else
+ gen = gen_vec_interleave_lowv8si;
+ break;
+ case V4DImode:
+ if (d->perm[0])
+ gen = gen_vec_interleave_highv4di;
+ else
+ gen = gen_vec_interleave_lowv4di;
+ break;
+ case V8SFmode:
+ if (d->perm[0])
+ gen = gen_vec_interleave_highv8sf;
+ else
+ gen = gen_vec_interleave_lowv8sf;
+ break;
+ case V4DFmode:
+ if (d->perm[0])
+ gen = gen_vec_interleave_highv4df;
+ else
+ gen = gen_vec_interleave_lowv4df;
+ break;
+ default:
+ gcc_unreachable ();
+ }
+
+ emit_insn (gen (d->target, d->op0, d->op1));
+ return true;
+}
+
/* A subroutine of expand_vec_perm_even_odd_1. Implement the double-word
permutation with two pshufb insns and an ior. We should have already
failed all two instruction sequences. */
@@ -35972,6 +36048,9 @@ ix86_expand_vec_perm_builtin_1 (struct expand_vec_perm_d *d)
if (expand_vec_perm_pshufb2 (d))
return true;
+ if (expand_vec_perm_interleave3 (d))
+ return true;
+
/* Try sequences of four instructions. */
if (expand_vec_perm_vpshufb2_vpermq (d))
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index 8b07f9a877a..016eae2d371 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -6848,6 +6848,38 @@
(set_attr "prefix" "orig,vex")
(set_attr "mode" "TI")])
+(define_expand "vec_interleave_high<mode>"
+ [(match_operand:VI_256 0 "register_operand" "=x")
+ (match_operand:VI_256 1 "register_operand" "x")
+ (match_operand:VI_256 2 "nonimmediate_operand" "xm")]
+ "TARGET_AVX2"
+{
+ rtx t1 = gen_reg_rtx (<MODE>mode);
+ rtx t2 = gen_reg_rtx (<MODE>mode);
+ emit_insn (gen_avx2_interleave_low<mode> (t1, operands[1], operands[2]));
+ emit_insn (gen_avx2_interleave_high<mode> (t2, operands[1], operands[2]));
+ emit_insn (gen_avx2_permv2ti (gen_lowpart (V4DImode, operands[0]),
+ gen_lowpart (V4DImode, t1),
+ gen_lowpart (V4DImode, t2), GEN_INT (1 + (3 << 4))));
+ DONE;
+})
+
+(define_expand "vec_interleave_low<mode>"
+ [(match_operand:VI_256 0 "register_operand" "=x")
+ (match_operand:VI_256 1 "register_operand" "x")
+ (match_operand:VI_256 2 "nonimmediate_operand" "xm")]
+ "TARGET_AVX2"
+{
+ rtx t1 = gen_reg_rtx (<MODE>mode);
+ rtx t2 = gen_reg_rtx (<MODE>mode);
+ emit_insn (gen_avx2_interleave_low<mode> (t1, operands[1], operands[2]));
+ emit_insn (gen_avx2_interleave_high<mode> (t2, operands[1], operands[2]));
+ emit_insn (gen_avx2_permv2ti (gen_lowpart (V4DImode, operands[0]),
+ gen_lowpart (V4DImode, t1),
+ gen_lowpart (V4DImode, t2), GEN_INT (0 + (2 << 4))));
+ DONE;
+})
+
;; Modes handled by pinsr patterns.
(define_mode_iterator PINSR_MODE
[(V16QI "TARGET_SSE4_1") V8HI