summaryrefslogtreecommitdiff
path: root/gcc/config
diff options
context:
space:
mode:
authorjgreenhalgh <jgreenhalgh@138bc75d-0d04-0410-961f-82ee72b054a4>2012-12-05 11:42:37 +0000
committerjgreenhalgh <jgreenhalgh@138bc75d-0d04-0410-961f-82ee72b054a4>2012-12-05 11:42:37 +0000
commit14799b23a661479a4f3d2ec15471afb3618af00d (patch)
tree8d38b0ce781601cf444ebe58a45a1bb2843aac2a /gcc/config
parent5de1fcdb13d0a384208d9a8975a177b243f71aaa (diff)
downloadgcc-14799b23a661479a4f3d2ec15471afb3618af00d.tar.gz
[AARCH64] Add zip{1, 2}, uzp{1, 2}, trn{1, 2} support
for vector permute. gcc/ * config/aarch64/aarch64-simd-builtins.def: Add new builtins. * config/aarch64/aarch64-simd.md (simd_type): Add uzp. (aarch64_<PERMUTE:perm_insn><PERMUTE:perm_hilo><mode>): New. * config/aarch64/aarch64.c (aarch64_evpc_trn): New. (aarch64_evpc_uzp): Likewise. (aarch64_evpc_zip): Likewise. (aarch64_expand_vec_perm_const_1): Check for trn, zip, uzp patterns. * config/aarch64/iterators.md (unspec): Add neccessary unspecs. (PERMUTE): New. (perm_insn): Likewise. (perm_hilo): Likewise. git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@194219 138bc75d-0d04-0410-961f-82ee72b054a4
Diffstat (limited to 'gcc/config')
-rw-r--r--gcc/config/aarch64/aarch64-simd-builtins.def9
-rw-r--r--gcc/config/aarch64/aarch64-simd.md15
-rw-r--r--gcc/config/aarch64/aarch64.c265
-rw-r--r--gcc/config/aarch64/iterators.md16
4 files changed, 303 insertions, 2 deletions
diff --git a/gcc/config/aarch64/aarch64-simd-builtins.def b/gcc/config/aarch64/aarch64-simd-builtins.def
index b344120ae90..d441417b00f 100644
--- a/gcc/config/aarch64/aarch64-simd-builtins.def
+++ b/gcc/config/aarch64/aarch64-simd-builtins.def
@@ -224,3 +224,12 @@
BUILTIN_VDQF (UNOP, fcvtpu)
BUILTIN_VDQF (UNOP, fcvtms)
BUILTIN_VDQF (UNOP, fcvtmu)
+
+ /* Implemented by
+ aarch64_<PERMUTE:perm_insn><PERMUTE:perm_hilo><mode>. */
+ BUILTIN_VALL (BINOP, zip1)
+ BUILTIN_VALL (BINOP, zip2)
+ BUILTIN_VALL (BINOP, uzp1)
+ BUILTIN_VALL (BINOP, uzp2)
+ BUILTIN_VALL (BINOP, trn1)
+ BUILTIN_VALL (BINOP, trn2)
diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md
index baee0cc8c59..febf71d37c4 100644
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -128,7 +128,8 @@
; simd_store4s store single structure from one lane for four registers (ST4 [index]).
; simd_tbl table lookup.
; simd_trn transpose.
-; simd_zip zip/unzip.
+; simd_uzp unzip.
+; simd_zip zip.
(define_attr "simd_type"
"simd_abd,\
@@ -230,6 +231,7 @@
simd_store4s,\
simd_tbl,\
simd_trn,\
+ simd_uzp,\
simd_zip,\
none"
(const_string "none"))
@@ -3406,6 +3408,17 @@
DONE;
})
+(define_insn "aarch64_<PERMUTE:perm_insn><PERMUTE:perm_hilo><mode>"
+ [(set (match_operand:VALL 0 "register_operand" "=w")
+ (unspec:VALL [(match_operand:VALL 1 "register_operand" "w")
+ (match_operand:VALL 2 "register_operand" "w")]
+ PERMUTE))]
+ "TARGET_SIMD"
+ "<PERMUTE:perm_insn><PERMUTE:perm_hilo>\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
+ [(set_attr "simd_type" "simd_<PERMUTE:perm_insn>")
+ (set_attr "simd_mode" "<MODE>")]
+)
+
(define_insn "aarch64_st2<mode>_dreg"
[(set (match_operand:TI 0 "aarch64_simd_struct_operand" "=Utv")
(unspec:TI [(match_operand:OI 1 "register_operand" "w")
diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index ae1a037cf78..7bc2f6b896a 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -6919,6 +6919,261 @@ aarch64_expand_vec_perm (rtx target, rtx op0, rtx op1, rtx sel)
aarch64_expand_vec_perm_1 (target, op0, op1, sel);
}
+/* Recognize patterns suitable for the TRN instructions. */
+static bool
+aarch64_evpc_trn (struct expand_vec_perm_d *d)
+{
+ unsigned int i, odd, mask, nelt = d->nelt;
+ rtx out, in0, in1, x;
+ rtx (*gen) (rtx, rtx, rtx);
+ enum machine_mode vmode = d->vmode;
+
+ if (GET_MODE_UNIT_SIZE (vmode) > 8)
+ return false;
+
+ /* Note that these are little-endian tests.
+ We correct for big-endian later. */
+ if (d->perm[0] == 0)
+ odd = 0;
+ else if (d->perm[0] == 1)
+ odd = 1;
+ else
+ return false;
+ mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
+
+ for (i = 0; i < nelt; i += 2)
+ {
+ if (d->perm[i] != i + odd)
+ return false;
+ if (d->perm[i + 1] != ((i + nelt + odd) & mask))
+ return false;
+ }
+
+ /* Success! */
+ if (d->testing_p)
+ return true;
+
+ in0 = d->op0;
+ in1 = d->op1;
+ if (BYTES_BIG_ENDIAN)
+ {
+ x = in0, in0 = in1, in1 = x;
+ odd = !odd;
+ }
+ out = d->target;
+
+ if (odd)
+ {
+ switch (vmode)
+ {
+ case V16QImode: gen = gen_aarch64_trn2v16qi; break;
+ case V8QImode: gen = gen_aarch64_trn2v8qi; break;
+ case V8HImode: gen = gen_aarch64_trn2v8hi; break;
+ case V4HImode: gen = gen_aarch64_trn2v4hi; break;
+ case V4SImode: gen = gen_aarch64_trn2v4si; break;
+ case V2SImode: gen = gen_aarch64_trn2v2si; break;
+ case V2DImode: gen = gen_aarch64_trn2v2di; break;
+ case V4SFmode: gen = gen_aarch64_trn2v4sf; break;
+ case V2SFmode: gen = gen_aarch64_trn2v2sf; break;
+ case V2DFmode: gen = gen_aarch64_trn2v2df; break;
+ default:
+ return false;
+ }
+ }
+ else
+ {
+ switch (vmode)
+ {
+ case V16QImode: gen = gen_aarch64_trn1v16qi; break;
+ case V8QImode: gen = gen_aarch64_trn1v8qi; break;
+ case V8HImode: gen = gen_aarch64_trn1v8hi; break;
+ case V4HImode: gen = gen_aarch64_trn1v4hi; break;
+ case V4SImode: gen = gen_aarch64_trn1v4si; break;
+ case V2SImode: gen = gen_aarch64_trn1v2si; break;
+ case V2DImode: gen = gen_aarch64_trn1v2di; break;
+ case V4SFmode: gen = gen_aarch64_trn1v4sf; break;
+ case V2SFmode: gen = gen_aarch64_trn1v2sf; break;
+ case V2DFmode: gen = gen_aarch64_trn1v2df; break;
+ default:
+ return false;
+ }
+ }
+
+ emit_insn (gen (out, in0, in1));
+ return true;
+}
+
+/* Recognize patterns suitable for the UZP instructions. */
+static bool
+aarch64_evpc_uzp (struct expand_vec_perm_d *d)
+{
+ unsigned int i, odd, mask, nelt = d->nelt;
+ rtx out, in0, in1, x;
+ rtx (*gen) (rtx, rtx, rtx);
+ enum machine_mode vmode = d->vmode;
+
+ if (GET_MODE_UNIT_SIZE (vmode) > 8)
+ return false;
+
+ /* Note that these are little-endian tests.
+ We correct for big-endian later. */
+ if (d->perm[0] == 0)
+ odd = 0;
+ else if (d->perm[0] == 1)
+ odd = 1;
+ else
+ return false;
+ mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
+
+ for (i = 0; i < nelt; i++)
+ {
+ unsigned elt = (i * 2 + odd) & mask;
+ if (d->perm[i] != elt)
+ return false;
+ }
+
+ /* Success! */
+ if (d->testing_p)
+ return true;
+
+ in0 = d->op0;
+ in1 = d->op1;
+ if (BYTES_BIG_ENDIAN)
+ {
+ x = in0, in0 = in1, in1 = x;
+ odd = !odd;
+ }
+ out = d->target;
+
+ if (odd)
+ {
+ switch (vmode)
+ {
+ case V16QImode: gen = gen_aarch64_uzp2v16qi; break;
+ case V8QImode: gen = gen_aarch64_uzp2v8qi; break;
+ case V8HImode: gen = gen_aarch64_uzp2v8hi; break;
+ case V4HImode: gen = gen_aarch64_uzp2v4hi; break;
+ case V4SImode: gen = gen_aarch64_uzp2v4si; break;
+ case V2SImode: gen = gen_aarch64_uzp2v2si; break;
+ case V2DImode: gen = gen_aarch64_uzp2v2di; break;
+ case V4SFmode: gen = gen_aarch64_uzp2v4sf; break;
+ case V2SFmode: gen = gen_aarch64_uzp2v2sf; break;
+ case V2DFmode: gen = gen_aarch64_uzp2v2df; break;
+ default:
+ return false;
+ }
+ }
+ else
+ {
+ switch (vmode)
+ {
+ case V16QImode: gen = gen_aarch64_uzp1v16qi; break;
+ case V8QImode: gen = gen_aarch64_uzp1v8qi; break;
+ case V8HImode: gen = gen_aarch64_uzp1v8hi; break;
+ case V4HImode: gen = gen_aarch64_uzp1v4hi; break;
+ case V4SImode: gen = gen_aarch64_uzp1v4si; break;
+ case V2SImode: gen = gen_aarch64_uzp1v2si; break;
+ case V2DImode: gen = gen_aarch64_uzp1v2di; break;
+ case V4SFmode: gen = gen_aarch64_uzp1v4sf; break;
+ case V2SFmode: gen = gen_aarch64_uzp1v2sf; break;
+ case V2DFmode: gen = gen_aarch64_uzp1v2df; break;
+ default:
+ return false;
+ }
+ }
+
+ emit_insn (gen (out, in0, in1));
+ return true;
+}
+
+/* Recognize patterns suitable for the ZIP instructions. */
+static bool
+aarch64_evpc_zip (struct expand_vec_perm_d *d)
+{
+ unsigned int i, high, mask, nelt = d->nelt;
+ rtx out, in0, in1, x;
+ rtx (*gen) (rtx, rtx, rtx);
+ enum machine_mode vmode = d->vmode;
+
+ if (GET_MODE_UNIT_SIZE (vmode) > 8)
+ return false;
+
+ /* Note that these are little-endian tests.
+ We correct for big-endian later. */
+ high = nelt / 2;
+ if (d->perm[0] == high)
+ /* Do Nothing. */
+ ;
+ else if (d->perm[0] == 0)
+ high = 0;
+ else
+ return false;
+ mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
+
+ for (i = 0; i < nelt / 2; i++)
+ {
+ unsigned elt = (i + high) & mask;
+ if (d->perm[i * 2] != elt)
+ return false;
+ elt = (elt + nelt) & mask;
+ if (d->perm[i * 2 + 1] != elt)
+ return false;
+ }
+
+ /* Success! */
+ if (d->testing_p)
+ return true;
+
+ in0 = d->op0;
+ in1 = d->op1;
+ if (BYTES_BIG_ENDIAN)
+ {
+ x = in0, in0 = in1, in1 = x;
+ high = !high;
+ }
+ out = d->target;
+
+ if (high)
+ {
+ switch (vmode)
+ {
+ case V16QImode: gen = gen_aarch64_zip2v16qi; break;
+ case V8QImode: gen = gen_aarch64_zip2v8qi; break;
+ case V8HImode: gen = gen_aarch64_zip2v8hi; break;
+ case V4HImode: gen = gen_aarch64_zip2v4hi; break;
+ case V4SImode: gen = gen_aarch64_zip2v4si; break;
+ case V2SImode: gen = gen_aarch64_zip2v2si; break;
+ case V2DImode: gen = gen_aarch64_zip2v2di; break;
+ case V4SFmode: gen = gen_aarch64_zip2v4sf; break;
+ case V2SFmode: gen = gen_aarch64_zip2v2sf; break;
+ case V2DFmode: gen = gen_aarch64_zip2v2df; break;
+ default:
+ return false;
+ }
+ }
+ else
+ {
+ switch (vmode)
+ {
+ case V16QImode: gen = gen_aarch64_zip1v16qi; break;
+ case V8QImode: gen = gen_aarch64_zip1v8qi; break;
+ case V8HImode: gen = gen_aarch64_zip1v8hi; break;
+ case V4HImode: gen = gen_aarch64_zip1v4hi; break;
+ case V4SImode: gen = gen_aarch64_zip1v4si; break;
+ case V2SImode: gen = gen_aarch64_zip1v2si; break;
+ case V2DImode: gen = gen_aarch64_zip1v2di; break;
+ case V4SFmode: gen = gen_aarch64_zip1v4sf; break;
+ case V2SFmode: gen = gen_aarch64_zip1v2sf; break;
+ case V2DFmode: gen = gen_aarch64_zip1v2df; break;
+ default:
+ return false;
+ }
+ }
+
+ emit_insn (gen (out, in0, in1));
+ return true;
+}
+
static bool
aarch64_evpc_tbl (struct expand_vec_perm_d *d)
{
@@ -6969,7 +7224,15 @@ aarch64_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
}
if (TARGET_SIMD)
- return aarch64_evpc_tbl (d);
+ {
+ if (aarch64_evpc_zip (d))
+ return true;
+ else if (aarch64_evpc_uzp (d))
+ return true;
+ else if (aarch64_evpc_trn (d))
+ return true;
+ return aarch64_evpc_tbl (d);
+ }
return false;
}
diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md
index 7cd4cef0eef..0eb30f06c04 100644
--- a/gcc/config/aarch64/iterators.md
+++ b/gcc/config/aarch64/iterators.md
@@ -230,6 +230,12 @@
UNSPEC_BSL ; Used in aarch64-simd.md.
UNSPEC_TBL ; Used in vector permute patterns.
UNSPEC_CONCAT ; Used in vector permute patterns.
+ UNSPEC_ZIP1 ; Used in vector permute patterns.
+ UNSPEC_ZIP2 ; Used in vector permute patterns.
+ UNSPEC_UZP1 ; Used in vector permute patterns.
+ UNSPEC_UZP2 ; Used in vector permute patterns.
+ UNSPEC_TRN1 ; Used in vector permute patterns.
+ UNSPEC_TRN2 ; Used in vector permute patterns.
])
;; -------------------------------------------------------------------
@@ -652,6 +658,9 @@
(define_int_iterator VCMP_U [UNSPEC_CMHS UNSPEC_CMHI UNSPEC_CMTST])
+(define_int_iterator PERMUTE [UNSPEC_ZIP1 UNSPEC_ZIP2
+ UNSPEC_TRN1 UNSPEC_TRN2
+ UNSPEC_UZP1 UNSPEC_UZP2])
(define_int_iterator FRINT [UNSPEC_FRINTZ UNSPEC_FRINTP UNSPEC_FRINTM
UNSPEC_FRINTI UNSPEC_FRINTX UNSPEC_FRINTA])
@@ -757,3 +766,10 @@
(define_int_attr fcvt_pattern [(UNSPEC_FRINTZ "btrunc") (UNSPEC_FRINTA "round")
(UNSPEC_FRINTP "ceil") (UNSPEC_FRINTM "floor")])
+(define_int_attr perm_insn [(UNSPEC_ZIP1 "zip") (UNSPEC_ZIP2 "zip")
+ (UNSPEC_TRN1 "trn") (UNSPEC_TRN2 "trn")
+ (UNSPEC_UZP1 "uzp") (UNSPEC_UZP2 "uzp")])
+
+(define_int_attr perm_hilo [(UNSPEC_ZIP1 "1") (UNSPEC_ZIP2 "2")
+ (UNSPEC_TRN1 "1") (UNSPEC_TRN2 "2")
+ (UNSPEC_UZP1 "1") (UNSPEC_UZP2 "2")])