diff options
Diffstat (limited to 'gcc/config')
-rw-r--r-- | gcc/config/aarch64/aarch64-simd-builtins.def | 9 | ||||
-rw-r--r-- | gcc/config/aarch64/aarch64-simd.md | 15 | ||||
-rw-r--r-- | gcc/config/aarch64/aarch64.c | 265 | ||||
-rw-r--r-- | gcc/config/aarch64/iterators.md | 16 |
4 files changed, 303 insertions, 2 deletions
diff --git a/gcc/config/aarch64/aarch64-simd-builtins.def b/gcc/config/aarch64/aarch64-simd-builtins.def index b344120ae90..d441417b00f 100644 --- a/gcc/config/aarch64/aarch64-simd-builtins.def +++ b/gcc/config/aarch64/aarch64-simd-builtins.def @@ -224,3 +224,12 @@ BUILTIN_VDQF (UNOP, fcvtpu) BUILTIN_VDQF (UNOP, fcvtms) BUILTIN_VDQF (UNOP, fcvtmu) + + /* Implemented by + aarch64_<PERMUTE:perm_insn><PERMUTE:perm_hilo><mode>. */ + BUILTIN_VALL (BINOP, zip1) + BUILTIN_VALL (BINOP, zip2) + BUILTIN_VALL (BINOP, uzp1) + BUILTIN_VALL (BINOP, uzp2) + BUILTIN_VALL (BINOP, trn1) + BUILTIN_VALL (BINOP, trn2) diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md index baee0cc8c59..febf71d37c4 100644 --- a/gcc/config/aarch64/aarch64-simd.md +++ b/gcc/config/aarch64/aarch64-simd.md @@ -128,7 +128,8 @@ ; simd_store4s store single structure from one lane for four registers (ST4 [index]). ; simd_tbl table lookup. ; simd_trn transpose. -; simd_zip zip/unzip. +; simd_uzp unzip. +; simd_zip zip. (define_attr "simd_type" "simd_abd,\ @@ -230,6 +231,7 @@ simd_store4s,\ simd_tbl,\ simd_trn,\ + simd_uzp,\ simd_zip,\ none" (const_string "none")) @@ -3406,6 +3408,17 @@ DONE; }) +(define_insn "aarch64_<PERMUTE:perm_insn><PERMUTE:perm_hilo><mode>" + [(set (match_operand:VALL 0 "register_operand" "=w") + (unspec:VALL [(match_operand:VALL 1 "register_operand" "w") + (match_operand:VALL 2 "register_operand" "w")] + PERMUTE))] + "TARGET_SIMD" + "<PERMUTE:perm_insn><PERMUTE:perm_hilo>\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>" + [(set_attr "simd_type" "simd_<PERMUTE:perm_insn>") + (set_attr "simd_mode" "<MODE>")] +) + (define_insn "aarch64_st2<mode>_dreg" [(set (match_operand:TI 0 "aarch64_simd_struct_operand" "=Utv") (unspec:TI [(match_operand:OI 1 "register_operand" "w") diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c index ae1a037cf78..7bc2f6b896a 100644 --- a/gcc/config/aarch64/aarch64.c +++ b/gcc/config/aarch64/aarch64.c @@ -6919,6 +6919,261 @@ aarch64_expand_vec_perm (rtx target, rtx op0, rtx op1, rtx sel) aarch64_expand_vec_perm_1 (target, op0, op1, sel); } +/* Recognize patterns suitable for the TRN instructions. */ +static bool +aarch64_evpc_trn (struct expand_vec_perm_d *d) +{ + unsigned int i, odd, mask, nelt = d->nelt; + rtx out, in0, in1, x; + rtx (*gen) (rtx, rtx, rtx); + enum machine_mode vmode = d->vmode; + + if (GET_MODE_UNIT_SIZE (vmode) > 8) + return false; + + /* Note that these are little-endian tests. + We correct for big-endian later. */ + if (d->perm[0] == 0) + odd = 0; + else if (d->perm[0] == 1) + odd = 1; + else + return false; + mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1); + + for (i = 0; i < nelt; i += 2) + { + if (d->perm[i] != i + odd) + return false; + if (d->perm[i + 1] != ((i + nelt + odd) & mask)) + return false; + } + + /* Success! */ + if (d->testing_p) + return true; + + in0 = d->op0; + in1 = d->op1; + if (BYTES_BIG_ENDIAN) + { + x = in0, in0 = in1, in1 = x; + odd = !odd; + } + out = d->target; + + if (odd) + { + switch (vmode) + { + case V16QImode: gen = gen_aarch64_trn2v16qi; break; + case V8QImode: gen = gen_aarch64_trn2v8qi; break; + case V8HImode: gen = gen_aarch64_trn2v8hi; break; + case V4HImode: gen = gen_aarch64_trn2v4hi; break; + case V4SImode: gen = gen_aarch64_trn2v4si; break; + case V2SImode: gen = gen_aarch64_trn2v2si; break; + case V2DImode: gen = gen_aarch64_trn2v2di; break; + case V4SFmode: gen = gen_aarch64_trn2v4sf; break; + case V2SFmode: gen = gen_aarch64_trn2v2sf; break; + case V2DFmode: gen = gen_aarch64_trn2v2df; break; + default: + return false; + } + } + else + { + switch (vmode) + { + case V16QImode: gen = gen_aarch64_trn1v16qi; break; + case V8QImode: gen = gen_aarch64_trn1v8qi; break; + case V8HImode: gen = gen_aarch64_trn1v8hi; break; + case V4HImode: gen = gen_aarch64_trn1v4hi; break; + case V4SImode: gen = gen_aarch64_trn1v4si; break; + case V2SImode: gen = gen_aarch64_trn1v2si; break; + case V2DImode: gen = gen_aarch64_trn1v2di; break; + case V4SFmode: gen = gen_aarch64_trn1v4sf; break; + case V2SFmode: gen = gen_aarch64_trn1v2sf; break; + case V2DFmode: gen = gen_aarch64_trn1v2df; break; + default: + return false; + } + } + + emit_insn (gen (out, in0, in1)); + return true; +} + +/* Recognize patterns suitable for the UZP instructions. */ +static bool +aarch64_evpc_uzp (struct expand_vec_perm_d *d) +{ + unsigned int i, odd, mask, nelt = d->nelt; + rtx out, in0, in1, x; + rtx (*gen) (rtx, rtx, rtx); + enum machine_mode vmode = d->vmode; + + if (GET_MODE_UNIT_SIZE (vmode) > 8) + return false; + + /* Note that these are little-endian tests. + We correct for big-endian later. */ + if (d->perm[0] == 0) + odd = 0; + else if (d->perm[0] == 1) + odd = 1; + else + return false; + mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1); + + for (i = 0; i < nelt; i++) + { + unsigned elt = (i * 2 + odd) & mask; + if (d->perm[i] != elt) + return false; + } + + /* Success! */ + if (d->testing_p) + return true; + + in0 = d->op0; + in1 = d->op1; + if (BYTES_BIG_ENDIAN) + { + x = in0, in0 = in1, in1 = x; + odd = !odd; + } + out = d->target; + + if (odd) + { + switch (vmode) + { + case V16QImode: gen = gen_aarch64_uzp2v16qi; break; + case V8QImode: gen = gen_aarch64_uzp2v8qi; break; + case V8HImode: gen = gen_aarch64_uzp2v8hi; break; + case V4HImode: gen = gen_aarch64_uzp2v4hi; break; + case V4SImode: gen = gen_aarch64_uzp2v4si; break; + case V2SImode: gen = gen_aarch64_uzp2v2si; break; + case V2DImode: gen = gen_aarch64_uzp2v2di; break; + case V4SFmode: gen = gen_aarch64_uzp2v4sf; break; + case V2SFmode: gen = gen_aarch64_uzp2v2sf; break; + case V2DFmode: gen = gen_aarch64_uzp2v2df; break; + default: + return false; + } + } + else + { + switch (vmode) + { + case V16QImode: gen = gen_aarch64_uzp1v16qi; break; + case V8QImode: gen = gen_aarch64_uzp1v8qi; break; + case V8HImode: gen = gen_aarch64_uzp1v8hi; break; + case V4HImode: gen = gen_aarch64_uzp1v4hi; break; + case V4SImode: gen = gen_aarch64_uzp1v4si; break; + case V2SImode: gen = gen_aarch64_uzp1v2si; break; + case V2DImode: gen = gen_aarch64_uzp1v2di; break; + case V4SFmode: gen = gen_aarch64_uzp1v4sf; break; + case V2SFmode: gen = gen_aarch64_uzp1v2sf; break; + case V2DFmode: gen = gen_aarch64_uzp1v2df; break; + default: + return false; + } + } + + emit_insn (gen (out, in0, in1)); + return true; +} + +/* Recognize patterns suitable for the ZIP instructions. */ +static bool +aarch64_evpc_zip (struct expand_vec_perm_d *d) +{ + unsigned int i, high, mask, nelt = d->nelt; + rtx out, in0, in1, x; + rtx (*gen) (rtx, rtx, rtx); + enum machine_mode vmode = d->vmode; + + if (GET_MODE_UNIT_SIZE (vmode) > 8) + return false; + + /* Note that these are little-endian tests. + We correct for big-endian later. */ + high = nelt / 2; + if (d->perm[0] == high) + /* Do Nothing. */ + ; + else if (d->perm[0] == 0) + high = 0; + else + return false; + mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1); + + for (i = 0; i < nelt / 2; i++) + { + unsigned elt = (i + high) & mask; + if (d->perm[i * 2] != elt) + return false; + elt = (elt + nelt) & mask; + if (d->perm[i * 2 + 1] != elt) + return false; + } + + /* Success! */ + if (d->testing_p) + return true; + + in0 = d->op0; + in1 = d->op1; + if (BYTES_BIG_ENDIAN) + { + x = in0, in0 = in1, in1 = x; + high = !high; + } + out = d->target; + + if (high) + { + switch (vmode) + { + case V16QImode: gen = gen_aarch64_zip2v16qi; break; + case V8QImode: gen = gen_aarch64_zip2v8qi; break; + case V8HImode: gen = gen_aarch64_zip2v8hi; break; + case V4HImode: gen = gen_aarch64_zip2v4hi; break; + case V4SImode: gen = gen_aarch64_zip2v4si; break; + case V2SImode: gen = gen_aarch64_zip2v2si; break; + case V2DImode: gen = gen_aarch64_zip2v2di; break; + case V4SFmode: gen = gen_aarch64_zip2v4sf; break; + case V2SFmode: gen = gen_aarch64_zip2v2sf; break; + case V2DFmode: gen = gen_aarch64_zip2v2df; break; + default: + return false; + } + } + else + { + switch (vmode) + { + case V16QImode: gen = gen_aarch64_zip1v16qi; break; + case V8QImode: gen = gen_aarch64_zip1v8qi; break; + case V8HImode: gen = gen_aarch64_zip1v8hi; break; + case V4HImode: gen = gen_aarch64_zip1v4hi; break; + case V4SImode: gen = gen_aarch64_zip1v4si; break; + case V2SImode: gen = gen_aarch64_zip1v2si; break; + case V2DImode: gen = gen_aarch64_zip1v2di; break; + case V4SFmode: gen = gen_aarch64_zip1v4sf; break; + case V2SFmode: gen = gen_aarch64_zip1v2sf; break; + case V2DFmode: gen = gen_aarch64_zip1v2df; break; + default: + return false; + } + } + + emit_insn (gen (out, in0, in1)); + return true; +} + static bool aarch64_evpc_tbl (struct expand_vec_perm_d *d) { @@ -6969,7 +7224,15 @@ aarch64_expand_vec_perm_const_1 (struct expand_vec_perm_d *d) } if (TARGET_SIMD) - return aarch64_evpc_tbl (d); + { + if (aarch64_evpc_zip (d)) + return true; + else if (aarch64_evpc_uzp (d)) + return true; + else if (aarch64_evpc_trn (d)) + return true; + return aarch64_evpc_tbl (d); + } return false; } diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md index 7cd4cef0eef..0eb30f06c04 100644 --- a/gcc/config/aarch64/iterators.md +++ b/gcc/config/aarch64/iterators.md @@ -230,6 +230,12 @@ UNSPEC_BSL ; Used in aarch64-simd.md. UNSPEC_TBL ; Used in vector permute patterns. UNSPEC_CONCAT ; Used in vector permute patterns. + UNSPEC_ZIP1 ; Used in vector permute patterns. + UNSPEC_ZIP2 ; Used in vector permute patterns. + UNSPEC_UZP1 ; Used in vector permute patterns. + UNSPEC_UZP2 ; Used in vector permute patterns. + UNSPEC_TRN1 ; Used in vector permute patterns. + UNSPEC_TRN2 ; Used in vector permute patterns. ]) ;; ------------------------------------------------------------------- @@ -652,6 +658,9 @@ (define_int_iterator VCMP_U [UNSPEC_CMHS UNSPEC_CMHI UNSPEC_CMTST]) +(define_int_iterator PERMUTE [UNSPEC_ZIP1 UNSPEC_ZIP2 + UNSPEC_TRN1 UNSPEC_TRN2 + UNSPEC_UZP1 UNSPEC_UZP2]) (define_int_iterator FRINT [UNSPEC_FRINTZ UNSPEC_FRINTP UNSPEC_FRINTM UNSPEC_FRINTI UNSPEC_FRINTX UNSPEC_FRINTA]) @@ -757,3 +766,10 @@ (define_int_attr fcvt_pattern [(UNSPEC_FRINTZ "btrunc") (UNSPEC_FRINTA "round") (UNSPEC_FRINTP "ceil") (UNSPEC_FRINTM "floor")]) +(define_int_attr perm_insn [(UNSPEC_ZIP1 "zip") (UNSPEC_ZIP2 "zip") + (UNSPEC_TRN1 "trn") (UNSPEC_TRN2 "trn") + (UNSPEC_UZP1 "uzp") (UNSPEC_UZP2 "uzp")]) + +(define_int_attr perm_hilo [(UNSPEC_ZIP1 "1") (UNSPEC_ZIP2 "2") + (UNSPEC_TRN1 "1") (UNSPEC_TRN2 "2") + (UNSPEC_UZP1 "1") (UNSPEC_UZP2 "2")]) |