summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rwxr-xr-xconfig.sub9
-rw-r--r--gcc/combine.c12
-rw-r--r--gcc/common/config/gcn/gcn-common.c29
-rw-r--r--gcc/config.gcc27
-rw-r--r--gcc/config/gcn/constraints.md129
-rw-r--r--gcc/config/gcn/gcn-builtins.def69
-rw-r--r--gcc/config/gcn/gcn-c.c24
-rw-r--r--gcc/config/gcn/gcn-hsa.h54
-rw-r--r--gcc/config/gcn/gcn-modes.def84
-rw-r--r--gcc/config/gcn/gcn-opts.h27
-rw-r--r--gcc/config/gcn/gcn-protos.h87
-rw-r--r--gcc/config/gcn/gcn-valu.md982
-rw-r--r--gcc/config/gcn/gcn.c2905
-rw-r--r--gcc/config/gcn/gcn.h718
-rw-r--r--gcc/config/gcn/gcn.md1068
-rw-r--r--gcc/config/gcn/gcn.opt40
-rw-r--r--gcc/config/gcn/predicates.md167
-rw-r--r--gcc/config/gcn/t-gcn-elf21
-rw-r--r--gcc/emit-rtl.c3
-rw-r--r--gcc/explow.c9
-rw-r--r--gcc/expr.c3
-rw-r--r--gcc/ira-costs.c8
-rw-r--r--gcc/ira.c6
-rw-r--r--gcc/lra-constraints.c10
-rw-r--r--gcc/lra-int.h6
-rw-r--r--gcc/print-rtl.c15
-rw-r--r--gcc/recog.h26
-rw-r--r--gcc/reload1.c2
-rw-r--r--gcc/simplify-rtx.c81
29 files changed, 6590 insertions, 31 deletions
diff --git a/config.sub b/config.sub
index 62b82599d98..94cc4d93d90 100755
--- a/config.sub
+++ b/config.sub
@@ -263,6 +263,7 @@ case $basic_machine in
| fido | fr30 | frv | ft32 \
| h8300 | h8500 | hppa | hppa1.[01] | hppa2.0 | hppa2.0[nw] | hppa64 \
| hexagon \
+ | amdgcn \
| i370 | i860 | i960 | ia64 \
| ip2k | iq2000 \
| k1om \
@@ -671,6 +672,9 @@ case $basic_machine in
fx2800)
basic_machine=i860-alliant
;;
+ amdgcn)
+ basic_machine=amdgcn-unknown
+ ;;
genix)
basic_machine=ns32k-ns
;;
@@ -1543,6 +1547,8 @@ case $os in
;;
-ios)
;;
+ -amdhsa)
+ ;;
-none)
;;
*)
@@ -1571,6 +1577,9 @@ case $basic_machine in
spu-*)
os=-elf
;;
+ amdgcn-*)
+ os=-amdhsa
+ ;;
*-acorn)
os=-riscix1.2
;;
diff --git a/gcc/combine.c b/gcc/combine.c
index 8dc62b57266..e47a4f20a62 100644
--- a/gcc/combine.c
+++ b/gcc/combine.c
@@ -8441,7 +8441,12 @@ gen_lowpart_or_truncate (machine_mode mode, rtx x)
{
/* Bit-cast X into an integer mode. */
if (!SCALAR_INT_MODE_P (GET_MODE (x)))
- x = gen_lowpart (int_mode_for_mode (GET_MODE (x)), x);
+ {
+ enum machine_mode imode = int_mode_for_mode (GET_MODE (x));
+ if (imode == BLKmode)
+ return gen_rtx_CLOBBER (mode, const0_rtx);
+ x = gen_lowpart (imode, x);
+ }
x = simplify_gen_unary (TRUNCATE, int_mode_for_mode (mode),
x, GET_MODE (x));
}
@@ -11446,6 +11451,11 @@ gen_lowpart_for_combine (machine_mode omode, rtx x)
if (omode == imode)
return x;
+ /* This can happen when there is no integer mode corresponding
+ to a size of vector mode. */
+ if (omode == BLKmode)
+ goto fail;
+
/* We can only support MODE being wider than a word if X is a
constant integer or has a mode the same size. */
if (GET_MODE_SIZE (omode) > UNITS_PER_WORD
diff --git a/gcc/common/config/gcn/gcn-common.c b/gcc/common/config/gcn/gcn-common.c
new file mode 100644
index 00000000000..6cafc371b26
--- /dev/null
+++ b/gcc/common/config/gcn/gcn-common.c
@@ -0,0 +1,29 @@
+/* Common hooks for GCN
+ Copyright (C) 2016-2017 Free Software Foundation, Inc.
+
+ This file is free software; you can redistribute it and/or modify it under
+ the terms of the GNU General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your option)
+ any later version.
+
+ This file is distributed in the hope that it will be useful, but WITHOUT
+ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with GCC; see the file COPYING3. If not see
+ <http://www.gnu.org/licenses/>. */
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "tm.h"
+#include "common/common-target.h"
+#include "common/common-target-def.h"
+#include "opts.h"
+#include "flags.h"
+#include "params.h"
+
+
+struct gcc_targetm_common targetm_common = TARGETM_COMMON_INITIALIZER;
diff --git a/gcc/config.gcc b/gcc/config.gcc
index fdf4cb845ae..215784030e3 100644
--- a/gcc/config.gcc
+++ b/gcc/config.gcc
@@ -311,6 +311,9 @@ alpha*-*-*)
cpu_type=alpha
extra_options="${extra_options} g.opt"
;;
+amdgcn*)
+ cpu_type=gcn
+ ;;
am33_2.0-*-linux*)
cpu_type=mn10300
;;
@@ -1258,6 +1261,12 @@ ft32-*-elf)
tm_file="dbxelf.h elfos.h newlib-stdint.h ${tm_file}"
tmake_file="${tmake_file} ft32/t-ft32"
;;
+amdgcn-*-amdhsa)
+ tm_file="dbxelf.h elfos.h gcn/gcn-hsa.h gcn/gcn.h newlib-stdint.h"
+ tmake_file="gcn/t-gcn-hsa"
+ native_system_header_dir=/include
+ extra_modes=gcn/gcn-modes.def
+ ;;
moxie-*-elf)
gas=yes
gnu_ld=yes
@@ -3928,6 +3937,24 @@ case "${target}" in
esac
;;
+ amdgcn-*-*)
+ supported_defaults="arch tune"
+
+ for which in arch tune; do
+ eval "val=\$with_$which"
+ case ${val} in
+ "" | carrizo | fiji)
+ # OK
+ ;;
+ *)
+ echo "Unknown cpu used in --with-$which=$val." 1>&2
+ exit 1
+ ;;
+ esac
+ done
+ [ "x$with_arch" = x ] && with_arch=carrizo
+ ;;
+
hppa*-*-*)
supported_defaults="arch schedule"
diff --git a/gcc/config/gcn/constraints.md b/gcc/config/gcn/constraints.md
new file mode 100644
index 00000000000..5ae42313652
--- /dev/null
+++ b/gcc/config/gcn/constraints.md
@@ -0,0 +1,129 @@
+;; Constraint definitions for GCN.
+;; Copyright (C) 2016-2017 Free Software Foundation, Inc.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+;;
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+;; GNU General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3. If not see
+;; <http://www.gnu.org/licenses/>.
+
+(define_constraint "I"
+ "Inline integer constant"
+ (and (match_code "const_int")
+ (match_test "ival >= -16 && ival <= 64")))
+
+(define_constraint "J"
+ "Signed integer 16-bit inline constant"
+ (and (match_code "const_int")
+ (match_test "((unsigned HOST_WIDE_INT) ival + 0x8000) < 0x10000")))
+
+(define_constraint "K"
+ "Integer 32-bit constant"
+ (and (match_code "const_int")
+ (match_test "trunc_int_for_mode (ival, SImode) == ival")))
+
+(define_constraint "O"
+ "Integer one constant"
+ (and (match_code "const_int")
+ (match_test "ival == 1")))
+
+(define_constraint "G"
+ "Inline floating point constant."
+ (and (match_code "const_double")
+ (match_test "gcn_inline_fp_constant_p (op, false) > 0")))
+
+(define_constraint "H"
+ "floating point constant representable as inline or 32bit immediate."
+ (and (match_code "const_double")
+ (match_test "gcn_fp_constant_p (op, false) > 0")))
+
+(define_constraint "A"
+ "Inline immediate parameter"
+ (and (match_code "const_int,const_double,const_vector")
+ (match_test "gcn_inline_constant_p (op)")))
+
+(define_constraint "B"
+ "Inline immediate parameter"
+ (and (match_code "const_int,const_double,const_vector")
+ (match_test "gcn_constant_p (op)")))
+
+(define_constraint "U"
+ "unspecified value"
+ (match_code "unspec"))
+
+(define_register_constraint "v" "VGPR_REGS"
+ "VGPR registers")
+
+(define_register_constraint "Sg" "SGPR_REGS"
+ "SGPR registers")
+
+(define_register_constraint "SD" "SGPR_DST_REGS"
+ "registers useable as a destination of scalar operation")
+
+(define_register_constraint "SS" "SGPR_SRC_REGS"
+ "registers useable as a source of scalar operation")
+
+(define_register_constraint "Sm" "SGPR_MEM_SRC_REGS"
+ "registers useable as a source of scalar memory operation")
+
+(define_register_constraint "Sv" "SGPR_VOP3A_SRC_REGS"
+ "registers useable as a source of VOP3A instruction")
+
+(define_register_constraint "ca" "ALL_CONDITIONAL_REGS"
+ "SCC VCCZ or EXECZ")
+
+(define_register_constraint "cs" "SCC_CONDITIONAL_REG"
+ "SCC")
+
+(define_register_constraint "cv" "VCCZ_CONDITIONAL_REG"
+ "VCCZ")
+
+(define_register_constraint "cV" "VCC_CONDITIONAL_REG"
+ "VCC")
+
+(define_register_constraint "ce" "EXECZ_CONDITIONAL_REG"
+ "EXECZ")
+
+(define_register_constraint "e" "EXEC_MASK_REG"
+ "EXEC")
+
+(define_memory_constraint "RB"
+ "Buffer memory address to scratch memory."
+ (and (match_code "mem")
+ (match_test "MEM_ADDR_SPACE (op) == ADDR_SPACE_SCRATCH")))
+
+(define_memory_constraint "RF"
+ "Buffer memory address to flat memory."
+ (and (match_code "mem")
+ (match_test "MEM_ADDR_SPACE (op) == ADDR_SPACE_FLAT")))
+
+(define_memory_constraint "RS"
+ "Buffer memory address to scalar flat memory."
+ (and (match_code "mem")
+ (match_test "MEM_ADDR_SPACE (op) == ADDR_SPACE_SCALAR_FLAT")))
+
+(define_memory_constraint "RL"
+ "Buffer memory address to LDS memory."
+ (and (match_code "mem")
+ (match_test "MEM_ADDR_SPACE (op) == ADDR_SPACE_LDS")))
+
+(define_memory_constraint "RG"
+ "Buffer memory address to GDS memory."
+ (and (match_code "mem")
+ (match_test "MEM_ADDR_SPACE (op) == ADDR_SPACE_GDS")))
+
+(define_memory_constraint "RD"
+ "Buffer memory address to GDS or LDS memory."
+ (and (match_code "mem")
+ (ior (match_test "MEM_ADDR_SPACE (op) == ADDR_SPACE_GDS")
+ (match_test "MEM_ADDR_SPACE (op) == ADDR_SPACE_LDS"))))
diff --git a/gcc/config/gcn/gcn-builtins.def b/gcc/config/gcn/gcn-builtins.def
new file mode 100644
index 00000000000..39a883f9e53
--- /dev/null
+++ b/gcc/config/gcn/gcn-builtins.def
@@ -0,0 +1,69 @@
+/* Copyright (C) 2016-2017 Free Software Foundation, Inc.
+
+ This file is free software; you can redistribute it and/or modify it under
+ the terms of the GNU General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your option)
+ any later version.
+
+ This file is distributed in the hope that it will be useful, but WITHOUT
+ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with GCC; see the file COPYING3. If not see
+ <http://www.gnu.org/licenses/>. */
+
+/* The first argument to these macros is the return type of the builtin,
+ the rest are arguments of the builtin. */
+#define _A1(a) {a, GCN_BTI_END_OF_PARAMS}
+#define _A2(a,b) {a, b, GCN_BTI_END_OF_PARAMS}
+#define _A3(a,b,c) {a, b, c, GCN_BTI_END_OF_PARAMS}
+#define _A4(a,b,c,d) {a, b, c, d, GCN_BTI_END_OF_PARAMS}
+#define _A5(a,b,c,d,e) {a, b, c, d, e, GCN_BTI_END_OF_PARAMS}
+
+DEF_BUILTIN (FLAT_LOAD_INT32, 1 /*CODE_FOR_flat_load_v64si*/,
+ "flat_load_int32", B_INSN,
+ _A3 (GCN_BTI_V64SI, GCN_BTI_EXEC, GCN_BTI_V64SI),
+ gcn_expand_builtin_1)
+
+DEF_BUILTIN (FLAT_LOAD_PTR_INT32, 2 /*CODE_FOR_flat_load_ptr_v64si */,
+ "flat_load_ptr_int32", B_INSN,
+ _A4 (GCN_BTI_V64SI, GCN_BTI_EXEC, GCN_BTI_SIPTR, GCN_BTI_V64SI),
+ gcn_expand_builtin_1)
+
+DEF_BUILTIN (FLAT_STORE_PTR_INT32, 3 /*CODE_FOR_flat_store_ptr_v64si */,
+ "flat_store_ptr_int32", B_INSN,
+ _A5 (GCN_BTI_VOID, GCN_BTI_EXEC, GCN_BTI_SIPTR, GCN_BTI_V64SI,
+ GCN_BTI_V64SI),
+ gcn_expand_builtin_1)
+
+DEF_BUILTIN (FLAT_LOAD_PTR_FLOAT, 2 /*CODE_FOR_flat_load_ptr_v64sf */,
+ "flat_load_ptr_float", B_INSN,
+ _A4 (GCN_BTI_V64SF, GCN_BTI_EXEC, GCN_BTI_SFPTR, GCN_BTI_V64SI),
+ gcn_expand_builtin_1)
+
+DEF_BUILTIN (FLAT_STORE_PTR_FLOAT, 3 /*CODE_FOR_flat_store_ptr_v64sf */,
+ "flat_store_ptr_float", B_INSN,
+ _A5 (GCN_BTI_VOID, GCN_BTI_EXEC, GCN_BTI_SFPTR, GCN_BTI_V64SI,
+ GCN_BTI_V64SF),
+ gcn_expand_builtin_1)
+
+/* DEF_BUILTIN_BINOP_INT_FP creates many variants of a builtin function for a
+ given operation. The first argument will give base to the identifier of a
+ particular builtin, the second will be used to form the name of the patter
+ used to expand it to and the third will be used to create the user-visible
+ builtin identifier. */
+
+DEF_BUILTIN_BINOP_INT_FP (ADD, add, "add")
+DEF_BUILTIN_BINOP_INT_FP (SUB, sub, "sub")
+
+DEF_BUILTIN_BINOP_INT_FP (AND, and, "and")
+DEF_BUILTIN_BINOP_INT_FP (IOR, ior, "or")
+DEF_BUILTIN_BINOP_INT_FP (XOR, xor, "xor")
+
+#undef _A1
+#undef _A2
+#undef _A3
+#undef _A4
+#undef _A5
diff --git a/gcc/config/gcn/gcn-c.c b/gcc/config/gcn/gcn-c.c
new file mode 100644
index 00000000000..039060b1134
--- /dev/null
+++ b/gcc/config/gcn/gcn-c.c
@@ -0,0 +1,24 @@
+/* Copyright (C) 2016-2017 Free Software Foundation, Inc.
+
+ This file is free software; you can redistribute it and/or modify it under
+ the terms of the GNU General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your option)
+ any later version.
+
+ This file is distributed in the hope that it will be useful, but WITHOUT
+ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with GCC; see the file COPYING3. If not see
+ <http://www.gnu.org/licenses/>. */
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "target.h"
+#include "c-family/c-common.h"
+#include "stringpool.h"
+#include "langhooks.h"
+
diff --git a/gcc/config/gcn/gcn-hsa.h b/gcc/config/gcn/gcn-hsa.h
new file mode 100644
index 00000000000..ef05db98d80
--- /dev/null
+++ b/gcc/config/gcn/gcn-hsa.h
@@ -0,0 +1,54 @@
+/* Copyright (C) 2016-2017 Free Software Foundation, Inc.
+
+ This file is free software; you can redistribute it and/or modify it under
+ the terms of the GNU General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your option)
+ any later version.
+
+ This file is distributed in the hope that it will be useful, but WITHOUT
+ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with GCC; see the file COPYING3. If not see
+ <http://www.gnu.org/licenses/>. */
+
+#ifndef OBJECT_FORMAT_ELF
+ #error elf.h included before elfos.h
+#endif
+
+#define TEXT_SECTION_NAME ".AMDGPU.config"
+
+#define BSS_SECTION_ASM_OP "\t.section .bss"
+
+#define ASM_OUTPUT_ALIGNED_BSS(FILE, DECL, NAME, SIZE, ALIGN) \
+ asm_output_aligned_bss (FILE, DECL, NAME, SIZE, ALIGN)
+
+
+#undef STANDARD_STARTFILE_PREFIX_2
+#define STANDARD_STARTFILE_PREFIX_2 ""
+
+#undef LOCAL_INCLUDE_DIR
+
+#undef STARTFILE_SPEC
+#define STARTFILE_SPEC ""
+
+#undef ENDFILE_SPEC
+#define ENDFILE_SPEC ""
+
+#define PREFERRED_DEBUGGING_TYPE DWARF2_DEBUG
+
+#define DWARF2_DEBUGGING_INFO 1
+#define DWARF2_ASM_LINE_DEBUG_INFO 1
+
+#define SET_ASM_OP "\t.set\t"
+
+#undef TARGET_ASM_NAMED_SECTION
+#define TARGET_ASM_NAMED_SECTION default_elf_asm_named_section
+
+#define EH_FRAME_THROUGH_COLLECT2 1
+
+#define LINK_SPEC ""
+
+#define LIB_SPEC ""
diff --git a/gcc/config/gcn/gcn-modes.def b/gcc/config/gcn/gcn-modes.def
new file mode 100644
index 00000000000..baeb23f5e2f
--- /dev/null
+++ b/gcc/config/gcn/gcn-modes.def
@@ -0,0 +1,84 @@
+/* Copyright (C) 2016-2017 Free Software Foundation, Inc.
+
+ This file is free software; you can redistribute it and/or modify it under
+ the terms of the GNU General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your option)
+ any later version.
+
+ This file is distributed in the hope that it will be useful, but WITHOUT
+ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with GCC; see the file COPYING3. If not see
+ <http://www.gnu.org/licenses/>. */
+
+/* Half-precision floating point */
+FLOAT_MODE (HF, 2, 0);
+/* FIXME: No idea what format it is. */
+ADJUST_FLOAT_FORMAT (HF, &ieee_half_format);
+
+/* Native vector modes. */
+VECTOR_MODE (INT, QI, 2); /* V2QI */
+VECTOR_MODE (INT, QI, 4); /* V4QI */
+VECTOR_MODE (INT, QI, 8); /* V8QI */
+VECTOR_MODE (INT, QI, 16); /* V16QI */
+VECTOR_MODE (INT, QI, 32); /* V32QI */
+VECTOR_MODE (INT, QI, 64); /* V64QI */
+VECTOR_MODE (INT, HI, 2); /* V2HI */
+VECTOR_MODE (INT, HI, 4); /* V4HI */
+VECTOR_MODE (INT, HI, 8); /* V8HI */
+VECTOR_MODE (INT, HI, 16); /* V16HI */
+VECTOR_MODE (INT, HI, 32); /* V32HI */
+VECTOR_MODE (INT, HI, 64); /* V64HI */
+VECTOR_MODE (INT, SI, 2); /* V2SI */
+VECTOR_MODE (INT, SI, 4); /* V4SI */
+VECTOR_MODE (INT, SI, 8); /* V8SI */
+VECTOR_MODE (INT, SI, 16); /* V16SI */
+VECTOR_MODE (INT, SI, 32); /* V32SI */
+VECTOR_MODE (INT, SI, 64); /* V64SI */
+VECTOR_MODE (INT, DI, 2); /* V2DI */
+VECTOR_MODE (INT, DI, 4); /* V4DI */
+VECTOR_MODE (INT, DI, 8); /* V8DI */
+VECTOR_MODE (INT, DI, 16); /* V16DI */
+VECTOR_MODE (INT, DI, 32); /* V64DI */
+VECTOR_MODE (INT, DI, 64); /* V64DI */
+VECTOR_MODE (INT, TI, 4); /* V4TI */
+VECTOR_MODE (INT, TI, 8); /* V8TI */
+VECTOR_MODE (INT, TI, 16); /* V16TI */
+VECTOR_MODE (INT, TI, 32); /* V32TI */
+VECTOR_MODE (INT, TI, 64); /* V64TI */
+VECTOR_MODE (FLOAT, HF, 2); /* V2HF */
+VECTOR_MODE (FLOAT, HF, 4); /* V4HF */
+VECTOR_MODE (FLOAT, HF, 8); /* V8HF */
+VECTOR_MODE (FLOAT, HF, 16); /* V16HF */
+VECTOR_MODE (FLOAT, HF, 32); /* V64HF */
+VECTOR_MODE (FLOAT, HF, 64); /* V64HF */
+VECTOR_MODE (FLOAT, SF, 2); /* V2SF */
+VECTOR_MODE (FLOAT, SF, 4); /* V4SF */
+VECTOR_MODE (FLOAT, SF, 8); /* V8SF */
+VECTOR_MODE (FLOAT, SF, 16); /* V16SF */
+VECTOR_MODE (FLOAT, SF, 32); /* V64SF */
+VECTOR_MODE (FLOAT, SF, 64); /* V64SF */
+VECTOR_MODE (FLOAT, DF, 2); /* V2DF */
+VECTOR_MODE (FLOAT, DF, 4); /* V4DF */
+VECTOR_MODE (FLOAT, DF, 8); /* V8DF */
+VECTOR_MODE (FLOAT, DF, 16); /* V16DF */
+VECTOR_MODE (FLOAT, DF, 32); /* V64DF */
+VECTOR_MODE (FLOAT, DF, 64); /* V64DF */
+
+/* Vector units handle reads independently and thus no large alignment
+ needed. */
+ADJUST_ALIGNMENT (V64QI, 1);
+ADJUST_ALIGNMENT (V64HI, 2);
+ADJUST_ALIGNMENT (V64SI, 4);
+ADJUST_ALIGNMENT (V64DI, 8);
+ADJUST_ALIGNMENT (V64TI, 16);
+ADJUST_ALIGNMENT (V64HF, 2);
+ADJUST_ALIGNMENT (V64SF, 4);
+ADJUST_ALIGNMENT (V64DF, 8);
+
+/* Register pairs, tripples and quadruples. */
+VECTOR_MODE (INT, SI, 3); /* V3SI */
+VECTOR_MODE (FLOAT, SF, 3); /* V3SF */
diff --git a/gcc/config/gcn/gcn-opts.h b/gcc/config/gcn/gcn-opts.h
new file mode 100644
index 00000000000..d0586d62f87
--- /dev/null
+++ b/gcc/config/gcn/gcn-opts.h
@@ -0,0 +1,27 @@
+/* Copyright (C) 2016-2017 Free Software Foundation, Inc.
+
+ This file is free software; you can redistribute it and/or modify it under
+ the terms of the GNU General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your option)
+ any later version.
+
+ This file is distributed in the hope that it will be useful, but WITHOUT
+ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with GCC; see the file COPYING3. If not see
+ <http://www.gnu.org/licenses/>. */
+
+#ifndef GCN_OPTS_H
+#define GCN_OPTS_H
+
+/* Which processor to generate code or schedule for. */
+enum processor_type
+{
+ PROCESSOR_CARRIZO,
+ PROCESSOR_FIJI
+};
+
+#endif
diff --git a/gcc/config/gcn/gcn-protos.h b/gcc/config/gcn/gcn-protos.h
new file mode 100644
index 00000000000..e5dd5280c34
--- /dev/null
+++ b/gcc/config/gcn/gcn-protos.h
@@ -0,0 +1,87 @@
+/* Copyright (C) 2016-2017 Free Software Foundation, Inc.
+
+ This file is free software; you can redistribute it and/or modify it under
+ the terms of the GNU General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your option)
+ any later version.
+
+ This file is distributed in the hope that it will be useful, but WITHOUT
+ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with GCC; see the file COPYING3. If not see
+ <http://www.gnu.org/licenses/>. */
+
+#ifndef _GCN_PROTOS_
+#define _GCN_PROTOS_
+
+extern bool gcn_hard_regno_mode_ok (int regno, machine_mode mode);
+extern int gcn_hard_regno_nregs(int regno, enum machine_mode mode);
+extern enum reg_class gcn_regno_reg_class (int regno);
+extern bool gcn_cannot_change_mode_class (machine_mode, machine_mode, int);
+extern int gcn_inline_fp_constant_p (rtx, bool);
+extern bool gcn_fp_constant_p (rtx, bool);
+extern bool gcn_inline_constant_p (rtx);
+extern bool gcn_constant_p (rtx);
+extern bool gcn_vgpr_move_p (rtx, rtx);
+extern bool gcn_sgpr_move_p (rtx, rtx);
+
+extern bool gcn_regno_mode_code_ok_for_base_p
+ (int, machine_mode, addr_space_t, int, int);
+extern reg_class gcn_mode_code_base_reg_class
+ (machine_mode, addr_space_t, int, int);
+extern bool regno_ok_for_index_p (int);
+extern void print_operand_address (FILE * file, register rtx addr);
+extern void print_operand (FILE * file, rtx x, int code);
+
+extern rtx gcn_operand_part (machine_mode, rtx, int);
+extern rtx gcn_operand_doublepart (machine_mode, rtx, int);
+extern void gcn_split_operands (machine_mode, rtx *, int, int);
+extern bool gcn_can_split_p (machine_mode, rtx);
+extern bool gcn_can_split_operands_p (machine_mode, rtx *, int);
+extern rtx gcn_vec_constant (machine_mode, int);
+extern rtx gcn_vec_constant (machine_mode, rtx);
+extern bool gcn_expand_mov (machine_mode, rtx, rtx);
+extern void gcn_expand_vector_init (rtx, rtx);
+
+extern void gcn_hsa_declare_function_name (FILE *file,
+ const char *name, tree decl);
+
+extern rtx gcn_gen_undef (enum machine_mode);
+extern rtx gcn_scalar_exec ();
+extern rtx gcn_default_exec ();
+extern rtx gcn_full_exec ();
+extern rtx gcn_full_exec_reg ();
+
+extern void gcn_expand_prologue ();
+
+extern int gcn_regmove_natural_size (enum machine_mode);
+extern unsigned gcn_frame_pointer_regnum ();
+
+#ifdef TREE_CODE
+extern void gcn_init_cumulative_args (CUMULATIVE_ARGS *, tree, rtx, tree, int);
+#endif
+
+/* Return true if MODE is valid for 1 VGPR register. */
+
+inline bool
+vgpr_1reg_mode_p (enum machine_mode mode)
+{
+ return mode == SImode || mode == SFmode || mode == HImode
+ /*|| mode == V32BImode*/
+ || mode == V64HImode || mode == V64SImode
+ || mode == V64HFmode || mode == V64SFmode || mode == BImode;
+}
+
+/* Return true if MODE is valid for 1 SGPR register. */
+
+static inline bool
+sgpr_1reg_mode_p (enum machine_mode mode)
+{
+ return mode == SImode || mode == SFmode || mode == HImode || mode == BImode
+ /*|| mode == V32BImode*/;
+}
+
+#endif
diff --git a/gcc/config/gcn/gcn-valu.md b/gcc/config/gcn/gcn-valu.md
new file mode 100644
index 00000000000..894340a46c5
--- /dev/null
+++ b/gcc/config/gcn/gcn-valu.md
@@ -0,0 +1,982 @@
+;; Copyright (C) 2016-2017 Free Software Foundation, Inc.
+
+;; This file is free software; you can redistribute it and/or modify it under
+;; the terms of the GNU General Public License as published by the Free
+;; Software Foundation; either version 3 of the License, or (at your option)
+;; any later version.
+
+;; This file is distributed in the hope that it will be useful, but WITHOUT
+;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+;; FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+;; for more details.
+
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3. If not see
+;; <http://www.gnu.org/licenses/>.
+
+;; -------------------------------------------------------------------------
+;; Moves
+; Vector modes for one vector register
+(define_mode_iterator VEC_1REG_MODE
+ [V64QI V64HI V64SI V64HF V64SF])
+
+(define_mode_iterator VEC_1REG_INT_MODE
+ [V64QI V64HI V64SI])
+
+; Vector modes for two vector registers
+(define_mode_iterator VEC_2REG_MODE
+ [V64DI V64DF])
+
+; All of above
+(define_mode_iterator VEC_REG_MODE
+ [V64QI V64HI V64SI V64HF V64SF ; Single reg
+ V64DI V64DF]) ; Double reg
+
+; Modes supporting integer vector operations
+(define_mode_iterator V_INT_MODE [SI V64SI])
+
+; Modes we can perform flat memory operations in.
+; FIXME: We can also do 96bit, 128bit and 256bit loads into multiple
+; registers. Eventually add modes for this.
+(define_mode_iterator VEC_FLAT_MODE
+ [V64QI V64HI V64SI V64HF V64SF ; Single reg
+ V64DI V64DF]) ; Double reg
+
+; Modes we can perform scalar flat memory operations in.
+(define_mode_iterator SCALAR_FLAT_MODE
+ [BI QI HI HF SI SF ; single regs
+ DI DF V2SI V2SF ; two regs
+ V3SI V3SF ; three regs
+ TI V4SI V4SF V2DI V2DF]) ; four regs
+
+;; Mapping of full vector modes to shorter vectors
+(define_mode_attr scalar_mode
+ [(V64QI "QI") (V64HI "HI") (V64SI "SI")
+ (V64HF "HF") (V64SF "SF") (V64DI "DI") (V64DF "DF")])
+
+(define_mode_attr v2_mode
+ [(V64QI "V2QI") (V64HI "V2HI") (V64SI "V2SI")
+ (V64HF "V2HF") (V64SF "V2SF") (V64DI "V2DI") (V64DF "V2DF")])
+
+(define_mode_attr v4_mode
+ [(V64QI "V4QI") (V64HI "V4HI") (V64SI "V4SI")
+ (V64HF "V4HF") (V64SF "V4SF") (V64DI "V4DI") (V64DF "V4DF")])
+
+(define_mode_attr v8_mode
+ [(V64QI "V8QI") (V64HI "V8HI") (V64SI "V8SI")
+ (V64HF "V8HF") (V64SF "V8SF") (V64DI "V8DI") (V64DF "V8DF")])
+
+(define_mode_attr v16_mode
+ [(V64QI "V16QI") (V64HI "V16HI") (V64SI "V16SI")
+ (V64HF "V16HF") (V64SF "V16SF") (V64DI "V16DI") (V64DF "V16DF")])
+
+(define_mode_attr v32_mode
+ [(V64QI "V32QI") (V64HI "V32HI") (V64SI "V32SI")
+ (V64HF "V32HF") (V64SF "V32SF") (V64DI "V32DI") (V64DF "V32DF")])
+
+(define_insn "*simple_buffer_mov<mode>"
+ [(set (match_operand:SCALAR_FLAT_MODE 0 "nonimmediate_operand" "=RBm,v")
+ (match_operand:SCALAR_FLAT_MODE 1 "general_operand" "v, RBm"))
+ (use (match_operand:TI 2 "register_operand" "Sg, Sg"))
+ (use (match_operand:DI 3 "gcn_exec_reg_operand" "e, e"))]
+ "memory_operand (operands[0], VOIDmode) != memory_operand (operands[1], VOIDmode)"
+ "@
+ buffer_store%s1 %1, off, %2, %A0\n\ts_waitcnt vmcnt(0) expcnt(0)
+ buffer_load%s0 %0, off, %2, %A1\n\ts_waitcnt vmcnt(0)"
+ [(set_attr "type" "flat")
+ (set_attr "mode" "<MODE>")])
+
+;; This ethernal uglyness makes LRA to sort-of work.
+(define_insn_and_split "*ugly_reload_mov<mode>"
+ [(set (match_operand:SCALAR_FLAT_MODE 0 "nonimmediate_operand" "=RBm,v, ^RBm,^v, SD, SD, v, v , ^v, ^v , v, Sm, RDRF,v")
+ (match_operand:SCALAR_FLAT_MODE 1 "general_operand" "v, RBm, v, RBm, SSB,SSn,vn, SS, vn, SS, SS, vSS,v ,RDRF"))
+ (use (match_operand:TI 2 "register_operand" "Sg, Sg, Sg, Sg, Sg, Sg, Sg, Sg, Sg, Sg, Sg, Sg, Sg ,Sg"))
+ (use (match_operand:DI 3 "gcn_exec_operand" "e ,e ,Sgn, Sgn, SSn,SSn,e ,e ,Sgn, Sgn,SSO,SSO,e ,e"))
+ (clobber (match_operand:DI 4 "register_operand" "=&Sg,Sg,Sg,Sg,Sg,Sg,Sg,Sg,Sg,Sg,Sg,Sg,Sg,Sg"))]
+ "(!immediate_operand (operands[1], VOIDmode) || register_operand (operands[0], VOIDmode))
+ && (!memory_operand (operands[0], VOIDmode) || !memory_operand (operands[1], VOIDmode))"
+ "#"
+ "!memory_operand (operands[0],<MODE>mode) && !memory_operand (operands[1],<MODE>mode)
+ && !gcn_vgpr_register_operand (operands[0], <MODE>mode)
+ && !gcn_vgpr_register_operand (operands[1], <MODE>mode)"
+ [(set (match_dup 0) (match_dup 1))]
+{}
+ [(set_attr "type" "flat")
+ (set_attr "mode" "<MODE>")])
+
+(define_split
+ [(set (match_operand:SCALAR_FLAT_MODE 0 "nonimmediate_operand")
+ (match_operand:SCALAR_FLAT_MODE 1 "general_operand"))
+ (use (match_operand:TI 2 "register_operand"))
+ (use (match_operand:DI 3 "gcn_exec_operand"))
+ (clobber (match_scratch:DI 4 ""))]
+ "REG_P (operands[3]) && REGNO (operands[3]) == EXEC_REG
+ && (memory_operand (operands[0], VOIDmode) || memory_operand (operands[1], VOIDmode))"
+ [(parallel [(set (match_dup 0) (match_dup 1))
+ (use (match_dup 2))
+ (use (match_dup 3))])])
+
+(define_split
+ [(set (match_operand:SCALAR_FLAT_MODE 0 "nonimmediate_operand")
+ (match_operand:SCALAR_FLAT_MODE 1 "general_operand"))
+ (use (match_operand:TI 2 "register_operand"))
+ (use (match_operand:DI 3 "gcn_exec_operand"))
+ (clobber (match_scratch:DI 4 ""))]
+ "REG_P (operands[3]) && REGNO (operands[3]) == EXEC_REG"
+ [(parallel [(set (match_dup 0) (match_dup 1))
+ (use (match_dup 3))])])
+
+(define_split
+ [(set (match_operand:SCALAR_FLAT_MODE 0 "nonimmediate_operand")
+ (match_operand:SCALAR_FLAT_MODE 1 "general_operand"))
+ (use (match_operand:TI 2 "register_operand"))
+ (use (match_operand:DI 3 "gcn_exec_operand"))
+ (clobber (match_operand:DI 4 "register_operand"))]
+ "(memory_operand (operands[0], VOIDmode) || memory_operand (operands[1], VOIDmode))"
+ [(set (match_dup 4) (reg:DI EXEC_REG))
+ (set (reg:DI EXEC_REG) (match_dup 3))
+ (parallel [(set (match_dup 0) (match_dup 1))
+ (use (match_dup 2))
+ (use (reg:DI EXEC_REG))])
+ (set (reg:DI EXEC_REG) (match_dup 4))])
+
+(define_split
+ [(set (match_operand:SCALAR_FLAT_MODE 0 "nonimmediate_operand")
+ (match_operand:SCALAR_FLAT_MODE 1 "general_operand"))
+ (use (match_operand:TI 2 "register_operand"))
+ (use (match_operand:DI 3 "gcn_exec_operand"))
+ (clobber (match_operand:DI 4 "register_operand"))]
+ ""
+ [(set (match_dup 4) (reg:DI EXEC_REG))
+ (set (reg:DI EXEC_REG) (match_dup 3))
+ (parallel [(set (match_dup 0) (match_dup 1))
+ (use (reg:DI EXEC_REG))])
+ (set (reg:DI EXEC_REG) (match_dup 4))])
+
+(define_insn "*mov<mode>"
+ [(set (match_operand:VEC_1REG_MODE 0 "nonimmediate_operand" "=v,RD,RF,v ,v")
+ (vec_merge:VEC_1REG_MODE
+ (match_operand:VEC_1REG_MODE 1 "gcn_load_operand" "vB,v ,v,RD,RF")
+ (match_operand:VEC_1REG_MODE 3 "gcn_register_or_unspec_operand" "0U,0U,0U,0U,0U")
+ (match_operand:DI 2 "gcn_exec_reg_operand" "e,e,e,e,e")))]
+ "!memory_operand (operands[0], VOIDmode) || register_operand (operands[1], VOIDmode)"
+ "@
+ v_mov_b32\t%0, %1
+ ds_write_b32\t%A0, %1%O0
+ flat_store%s1\t%A0, %1
+ ds_read_b32\t%0, %A1%O1
+ flat_load%s0\t%0, %A1\n\ts_waitcnt\tlgkmcnt(0),vmcnt(0)"
+ [(set_attr "type" "vop1,dsmem,flat,dsmem,flat")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "*mov<mode>"
+ [(set (match_operand:VEC_2REG_MODE 0 "register_operand" "=&v")
+ (vec_merge:VEC_2REG_MODE
+ (match_operand:VEC_2REG_MODE 1 "gcn_alu_operand" "0vB")
+ (match_operand:VEC_2REG_MODE 3 "gcn_register_or_unspec_operand" "0U")
+ (match_operand:DI 2 "gcn_exec_reg_operand" "e")))]
+ "!memory_operand (operands[0], VOIDmode) || register_operand (operands[1], VOIDmode)"
+ "v_mov_b32\t%L0, %L1\n\tv_mov_b32\t%H0, %H1"
+ [(set_attr "type" "vop1")
+ (set_attr "mode" "<MODE>")])
+
+; TODO: Add zero/sign extending variants.
+
+;; -------------------------------------------------------------------------
+;; Vector lane moves
+;; -------------------------------------------------------------------------
+
+; v_writelane/readlane works regardless of exec flags.
+; We allow source to be scratch
+(define_insn "*vec_set<mode>"
+ [(set (match_operand:VEC_1REG_MODE 0 "register_operand" "=v")
+ (vec_merge:VEC_1REG_MODE
+ (vec_duplicate:VEC_1REG_MODE
+ (match_operand:<scalar_mode> 1 "register_operand" "SS"))
+ (match_operand:VEC_1REG_MODE 3 "gcn_register_or_unspec_operand" "0U")
+ (ashift (const_int 1) (match_operand:SI 2 "gcn_alu_operand" "SSB"))))]
+ ""
+ "v_writelane_b32 %0, %1, %2"
+ [(set_attr "type" "vop3a")
+ (set_attr "mode" "<scalar_mode>")
+ (set_attr "exec" "any")])
+
+; FIXME: 64bit operations really should be splitters, but I am not sure how
+; to represent vertical subregs.
+(define_insn "*vec_set<mode>"
+ [(set (match_operand:VEC_2REG_MODE 0 "register_operand" "=v")
+ (vec_merge:VEC_2REG_MODE
+ (vec_duplicate:VEC_2REG_MODE
+ (match_operand:<scalar_mode> 1 "register_operand" "SS"))
+ (match_operand:VEC_2REG_MODE 3 "gcn_register_or_unspec_operand" "0U")
+ (ashift (const_int 1) (match_operand:SI 2 "gcn_alu_operand" "SSB"))))]
+ ""
+ "v_writelane_b32 %L0, %L1, %2\n\tv_writelane_b32 %H0, %H1, %2"
+ [(set_attr "type" "vop3a")
+ (set_attr "mode" "<scalar_mode>")
+ (set_attr "exec" "any")])
+
+(define_expand "vec_set<mode>"
+ [(set (match_operand:VEC_1REG_MODE 0 "register_operand" "")
+ (vec_merge:VEC_1REG_MODE
+ (vec_duplicate:VEC_1REG_MODE
+ (match_operand:<scalar_mode> 1 "register_operand" ""))
+ (match_dup 0)
+ (ashift (const_int 1) (match_operand:SI 2 "gcn_alu_operand" ""))))]
+ "")
+
+(define_insn "*vec_set<mode>_1"
+ [(set (match_operand:VEC_1REG_MODE 0 "register_operand" "")
+ (vec_merge:VEC_1REG_MODE
+ (vec_duplicate:VEC_1REG_MODE
+ (match_operand:<scalar_mode> 1 "register_operand" ""))
+ (match_operand:VEC_1REG_MODE 3 "gcn_register_or_unspec_operand" "")
+ (match_operand:SI 2 "const_int_operand")))]
+ "((unsigned) exact_log2 (INTVAL (operands[2])) < 64)"
+{
+ operands[2] = GEN_INT (exact_log2 (INTVAL (operands[2])));
+ return "v_writelane_b32 %0, %1, %2";
+}
+ [(set_attr "type" "vop3a")
+ (set_attr "mode" "<scalar_mode>")
+ (set_attr "exec" "any")])
+
+(define_insn "*vec_set<mode>_1"
+ [(set (match_operand:VEC_2REG_MODE 0 "register_operand" "")
+ (vec_merge:VEC_2REG_MODE
+ (vec_duplicate:VEC_2REG_MODE
+ (match_operand:<scalar_mode> 1 "register_operand" ""))
+ (match_operand:VEC_2REG_MODE 3 "gcn_register_or_unspec_operand" "")
+ (match_operand:SI 2 "const_int_operand")))]
+ "((unsigned) exact_log2 (INTVAL (operands[2])) < 64)"
+{
+ operands[2] = GEN_INT (exact_log2 (INTVAL (operands[2])));
+ return "v_writelane_b32 %L0, %L1, %2\nv_writelane_b32 %H0, %H1, %2";
+}
+ [(set_attr "type" "vop3a")
+ (set_attr "mode" "<scalar_mode>")
+ (set_attr "exec" "any")])
+
+(define_insn "vec_duplicate<mode>"
+ [(set (match_operand:VEC_1REG_MODE 0 "register_operand" "=v")
+ (vec_merge:VEC_1REG_MODE
+ (vec_duplicate:VEC_1REG_MODE
+ (match_operand:<scalar_mode> 1 "gcn_alu_operand" "SSB"))
+ (match_operand:VEC_1REG_MODE 3 "gcn_register_or_unspec_operand" "0U")
+ (match_operand:DI 2 "gcn_exec_reg_operand" "e")))]
+ ""
+ "v_mov_b32\t%0, %1"
+ [(set_attr "type" "vop3a")
+ (set_attr "mode" "<scalar_mode>")
+ (set_attr "exec" "any")])
+
+(define_insn "vec_duplicate<mode>"
+ [(set (match_operand:VEC_2REG_MODE 0 "register_operand" "=v")
+ (vec_merge:VEC_2REG_MODE
+ (vec_duplicate:VEC_2REG_MODE
+ (match_operand:<scalar_mode> 1 "register_operand" "SS"))
+ (match_operand:VEC_2REG_MODE 3 "gcn_register_or_unspec_operand" "0U")
+ (match_operand:DI 2 "gcn_exec_reg_operand" "e")))]
+ ""
+ "v_mov_b32\t%L0, %L1\n\tv_mov_b32\t%H0, %H1"
+ [(set_attr "type" "vop3a")
+ (set_attr "mode" "<scalar_mode>")
+ (set_attr "exec" "any")])
+
+(define_insn "vec_extract<mode>"
+ [(set (match_operand:<scalar_mode> 0 "register_operand" "=Sm")
+ (vec_select:<scalar_mode>
+ (match_operand:VEC_1REG_MODE 1 "register_operand" "v")
+ (parallel [(match_operand:SI 2 "gcn_alu_operand" "SSB")])))]
+ ""
+ "v_readlane_b32 %0, %1, %2"
+ [(set_attr "type" "vop3a")
+ (set_attr "mode" "<scalar_mode>")
+ (set_attr "exec" "any")])
+
+(define_insn "vec_extract<mode>"
+ [(set (match_operand:<scalar_mode> 0 "register_operand" "=Sm")
+ (vec_select:<scalar_mode>
+ (match_operand:VEC_2REG_MODE 1 "register_operand" "v")
+ (parallel [(match_operand:SI 2 "gcn_alu_operand" "SSB")])))]
+ ""
+ "v_readlane_b32 %L0, %L1, %L2
+ v_readlane_b32 %H0, %H1, %H2"
+ [(set_attr "type" "vop3a")
+ (set_attr "mode" "<scalar_mode>")
+ (set_attr "exec" "any")])
+
+(define_expand "vec_init<mode>"
+ [(match_operand:VEC_REG_MODE 0 "register_operand")
+ (match_operand 1)]
+ ""
+{
+ gcn_expand_vector_init (operands[0], operands[1]);
+ DONE;
+})
+
+;; -------------------------------------------------------------------------
+;; Generic expanders for vector operations.
+;; -------------------------------------------------------------------------
+
+(define_mode_iterator V64SIDI [V64SI V64DI])
+
+; Integer operations that produce condition code
+(define_expand "<expander><mode>3"
+ [(parallel [
+ (set (match_operand:V64SIDI 0 "register_operand" "")
+ (vec_merge:V64SIDI
+ (plus_minus:V64SIDI
+ (match_operand:V64SIDI 1 "register_operand" "")
+ (match_operand:V64SIDI 2 "gcn_alu_operand" ""))
+ (match_dup 4)
+ (match_dup 3)))
+ (clobber (reg:DI VCC_REG))])]
+ ""
+{
+ operands[3] = gcn_full_exec_reg ();
+ operands[4] = gcn_gen_undef (<MODE>mode);
+})
+
+(define_mode_iterator VEC_INT_MODE [V64QI V64HI V64SI V64DI])
+(define_code_iterator bitop [and ior xor])
+
+(define_expand "<expander><mode>3"
+ [(set (match_operand:VEC_INT_MODE 0 "gcn_valu_dst_operand" "")
+ (vec_merge:VEC_INT_MODE
+ (bitop:VEC_INT_MODE
+ (match_operand:VEC_INT_MODE 1 "gcn_valu_src0_operand" "")
+ (match_operand:VEC_INT_MODE 2 "gcn_valu_src1com_operand" ""))
+ (match_dup 4)
+ (match_dup 3)))]
+ ""
+{
+ operands[3] = gcn_full_exec_reg ();
+ operands[4] = gcn_gen_undef (<MODE>mode);
+})
+
+(define_code_iterator shiftop [ashift lshiftrt ashiftrt])
+(define_expand "<expander><mode>3"
+ [(set (match_operand:VEC_INT_MODE 0 "register_operand" "")
+ (vec_merge:VEC_INT_MODE
+ (shiftop:VEC_INT_MODE
+ (match_operand:VEC_INT_MODE 1 "register_operand" "")
+ (match_operand:V64SI 2 "gcn_alu_operand" ""))
+ (match_dup 4)
+ (match_dup 3)))]
+ ""
+{
+ operands[3] = gcn_full_exec_reg ();
+ operands[4] = gcn_gen_undef (<MODE>mode);
+})
+
+;; -------------------------------------------------------------------------
+;; ALU special cases: Plus
+;; -------------------------------------------------------------------------
+
+
+; Turn vector pattern into scalar variant.
+
+(define_subst "vec_to_scalar"
+ [(set (match_operand:VEC_REG_MODE 0)
+ (vec_merge:VEC_REG_MODE
+ (match_operator:VEC_REG_MODE 1 ""
+ [(match_operand:VEC_REG_MODE 2)
+ (match_operand:VEC_REG_MODE 3)])
+ (match_operand:VEC_REG_MODE 4)
+ (match_operand:DI 5)))
+ (clobber (reg:DI VCC_REG))]
+ ""
+ [(set (match_dup:<scalar_mode> 0)
+ (match_op_dup:<scalar_mode> 1
+ [(match_dup:<scalar_mode> 1) (match_dup:<scalar_mode> 2)]))
+ (use (match_dup:<scalar_mode> 5))
+ (clobber (reg:DI VCC_REG))])
+
+(define_subst "vec_to_scalar"
+ [(set (match_operand:VEC_REG_MODE 0)
+ (vec_merge:VEC_REG_MODE
+ (match_operator:VEC_REG_MODE 1 ""
+ [(match_operand:VEC_REG_MODE 2)
+ (match_operand:VEC_REG_MODE 3)])
+ (match_operand:VEC_REG_MODE 4)
+ (match_operand:DI 5)))]
+ ""
+ [(set (match_dup:<scalar_mode> 0)
+ (match_op_dup:<scalar_mode> 1
+ [(match_dup:<scalar_mode> 1) (match_dup:<scalar_mode> 2)]))
+ (use (match_dup:<scalar_mode> 5))])
+
+(define_subst_attr "vec_suffix"
+ "vec_to_scalar" "vector" "scalar")
+
+(define_insn "addv64si3_<vec_suffix>"
+ [(set (match_operand:V64SI 0 "register_operand" "=v")
+ (vec_merge:V64SI
+ (plus:V64SI
+ (match_operand:V64SI 1 "register_operand" "%v")
+ (match_operand:V64SI 2 "gcn_alu_operand" " vSSB"))
+ (match_operand:V64SI 4 "gcn_register_or_unspec_operand" "0U")
+ (match_operand:DI 3 "gcn_exec_reg_operand" "e")))
+ (clobber (reg:DI VCC_REG))]
+ ""
+ "v_add_u32\t%0, vcc, %2, %1"
+ [(set_attr "type" "vop2")
+ (set_attr "mode" "V64SI")])
+
+(define_insn "addv64si3_vector_dup"
+ [(set (match_operand:V64SI 0 "register_operand" "=v")
+ (vec_merge:V64SI
+ (plus:V64SI
+ (vec_duplicate:V64SI
+ (match_operand:SI 2 "gcn_alu_operand" "SSB"))
+ (match_operand:V64SI 1 "register_operand" "v"))
+ (match_operand:V64SI 4 "gcn_register_or_unspec_operand" "0U")
+ (match_operand:DI 3 "gcn_exec_reg_operand" "e")))
+ (clobber (reg:DI VCC_REG))]
+ ""
+ "v_add_u32\t%0, vcc, %1, %2"
+ [(set_attr "type" "vop2")
+ (set_attr "mode" "SI")])
+
+(define_insn "addv64si3_vector_vcc"
+ [(set (match_operand:V64SI 0 "register_operand" "=v,v")
+ (vec_merge:V64SI
+ (plus:V64SI
+ (match_operand:V64SI 1 "register_operand" "%v,v")
+ (match_operand:V64SI 2 "gcn_alu_operand" " vSSB,vSSB"))
+ (match_operand:V64SI 4 "gcn_register_or_unspec_operand" "0U,0U")
+ (match_operand:DI 3 "gcn_exec_reg_operand" "e,e")))
+ (set (match_operand:DI 5 "register_operand" "=cV,Sg")
+ (ior:DI (and:DI (ltu:DI (plus:V64SI (match_dup 1)
+ (match_dup 2))
+ (match_dup 1))
+ (match_dup 3))
+ (and:DI (not:DI (match_dup 3))
+ (match_operand:DI 6 "gcn_register_or_unspec_operand" "5U,5U"))))]
+ ""
+ "v_add_u32\t%0, %5, %1, %2"
+ [(set_attr "type" "vop2,vop3b")
+ (set_attr "mode" "SI")])
+
+;; Tom says that he thinks the previous value of VCC is unchanged when
+;; execution lane is masked out.
+
+(define_insn "addv64si3_vector_vcc_dup"
+ [(set (match_operand:V64SI 0 "register_operand" "=v,v")
+ (vec_merge:V64SI
+ (plus:V64SI
+ (vec_duplicate:V64SI (match_operand:SI 2 "gcn_alu_operand" "SSB,SSB"))
+ (match_operand:V64SI 1 "register_operand" "v,v"))
+ (match_operand:V64SI 4 "gcn_register_or_unspec_operand" "0U,0U")
+ (match_operand:DI 3 "gcn_exec_reg_operand" "e,e")))
+ (set (match_operand:DI 5 "register_operand" "=cV,Sg")
+ (ior:DI (and:DI (ltu:DI (plus:V64SI (vec_duplicate:V64SI (match_dup 2))
+ (match_dup 1))
+ (vec_duplicate:V64SI (match_dup 2)))
+ (match_dup 3))
+ (and:DI (not:DI (match_dup 3))
+ (match_operand:DI 6 "gcn_register_or_unspec_operand" "5U,5U"))))]
+ ""
+ "v_add_u32\t%0, %5, %1, %2"
+ [(set_attr "type" "vop2,vop3b")
+ (set_attr "mode" "SI")])
+
+;; This pattern does not accept SGPR because VCC read already counts as a SGPR use
+;; and number of SGPR operands is limited to 1.
+(define_insn "addcv64si3_vec"
+ [(set (match_operand:V64SI 0 "register_operand" "=v,v")
+ (vec_merge:V64SI (plus:V64SI (plus:V64SI
+ (vec_merge:V64SI
+ (match_operand:V64SI 7 "gcn_vec1_operand" "A,A")
+ (match_operand:V64SI 8 "gcn_vec0_operand" "A,A")
+ (match_operand:DI 5 "register_operand" "cV,cV"))
+ (match_operand:V64SI 1 "gcn_alu_operand" "%vA,vA"))
+ (match_operand:V64SI 2 "gcn_alu_operand" "vB,vB"))
+ (match_operand:V64SI 4 "gcn_register_or_unspec_operand" "0U,0U")
+ (match_operand:DI 3 "gcn_exec_reg_operand" "e,e")))
+ (set (match_operand:DI 6 "register_operand" "=cV,Sg")
+ (ior:DI (and:DI (ior:DI (ltu:DI (plus:V64SI (plus:V64SI
+ (vec_merge:V64SI
+ (match_dup 7)
+ (match_dup 8)
+ (match_dup 5))
+ (match_dup 1))
+ (match_dup 2))
+ (match_dup 2))
+ (ltu:DI (plus:V64SI (vec_merge:V64SI
+ (match_dup 7)
+ (match_dup 8)
+ (match_dup 5))
+ (match_dup 1))
+ (match_dup 1)))
+ (match_dup 3))
+ (and:DI (not:DI (match_dup 3))
+ (match_operand:DI 9 "gcn_register_or_unspec_operand" "6U,6U"))))]
+ ""
+ "v_addc_u32\t%0, %6, %1, %2, vcc"
+ [(set_attr "type" "vop2,vop3b")
+ (set_attr "mode" "SI")])
+
+
+(define_insn "subv64si3_<vec_suffix>"
+ [(set (match_operand:V64SI 0 "register_operand" "=v,v")
+ (vec_merge:V64SI
+ (minus:V64SI
+ (match_operand:V64SI 1 "gcn_alu_operand" "vSSB,v")
+ (match_operand:V64SI 2 "gcn_alu_operand" "v,vSSB"))
+ (match_operand:V64SI 4 "gcn_register_or_unspec_operand" "0U,0U")
+ (match_operand:DI 3 "gcn_exec_reg_operand" "e,e")))
+ (clobber (reg:DI VCC_REG))]
+ "register_operand (operands[1], VOIDmode) || register_operand (operands[2], VOIDmode)"
+ "@
+ v_sub_u32\t%0, vcc, %2, %1
+ v_subrev_u32\t%0, vcc, %2, %1"
+ [(set_attr "type" "vop2")
+ (set_attr "mode" "SI")])
+
+(define_insn "subv64si3_vector_vcc"
+ [(set (match_operand:V64SI 0 "register_operand" "=v,v,v,v")
+ (vec_merge:V64SI
+ (minus:V64SI
+ (match_operand:V64SI 1 "gcn_alu_operand" "vSSB,vSSB,v,v")
+ (match_operand:V64SI 2 "gcn_alu_operand" "v,v,vSSB,vSSB"))
+ (match_operand:V64SI 4 "gcn_register_or_unspec_operand" "0U,0U,0U,0U")
+ (match_operand:DI 3 "gcn_exec_reg_operand" "e,e,e,e")))
+ (set (match_operand:DI 5 "register_operand" "=cV,Sg,cV,Sg")
+ (ior:DI (and:DI (gtu:DI (minus:V64SI (match_dup 1)
+ (match_dup 2))
+ (match_dup 1))
+ (match_dup 3))
+ (and:DI (not:DI (match_dup 3))
+ (match_operand:DI 6 "gcn_register_or_unspec_operand" "5U,5U,5U,5U"))))]
+ "register_operand (operands[1], VOIDmode) || register_operand (operands[2], VOIDmode)"
+ "@
+ v_sub_u32\t%0, %5, %2, %1
+ v_sub_u32\t%0, %5, %2, %1
+ v_subrev_u32\t%0, %5, %2, %1
+ v_subrev_u32\t%0, %5, %2, %1"
+ [(set_attr "type" "vop2,vop3b,vop2,vop3b")
+ (set_attr "mode" "SI")])
+
+;; This pattern does not accept SGPR because VCC read already counts
+;; as a SGPR use and number of SGPR operands is limited to 1.
+(define_insn "subcv64si3_vec"
+ [(set (match_operand:V64SI 0 "register_operand" "=v,v,v,v")
+ (vec_merge:V64SI (minus:V64SI (minus:V64SI
+ (vec_merge:V64SI
+ (match_operand:V64SI 7 "gcn_vec1_operand" "A,A,A,A")
+ (match_operand:V64SI 8 "gcn_vec0_operand" "A,A,A,A")
+ (match_operand:DI 5 "gcn_alu_operand" "cV,cV,cV,cV"))
+ (match_operand:V64SI 1 "gcn_alu_operand" "vA,vA,vB,vB"))
+ (match_operand:V64SI 2 "gcn_alu_operand" "vB,vB,vA,vA"))
+ (match_operand:V64SI 4 "gcn_register_or_unspec_operand" "0U,0U,0U,0U")
+ (match_operand:DI 3 "gcn_exec_reg_operand" "e,e,e,e")))
+ (set (match_operand:DI 6 "register_operand" "=cV,Sg,cV,Sg")
+ (ior:DI (and:DI (ior:DI (gtu:DI (minus:V64SI (minus:V64SI
+ (vec_merge:V64SI
+ (match_dup 7)
+ (match_dup 8)
+ (match_dup 5))
+ (match_dup 1))
+ (match_dup 2))
+ (match_dup 2))
+ (ltu:DI (minus:V64SI (vec_merge:V64SI
+ (match_dup 7)
+ (match_dup 8)
+ (match_dup 5))
+ (match_dup 1))
+ (match_dup 1)))
+ (match_dup 3))
+ (and:DI (not:DI (match_dup 3))
+ (match_operand:DI 9 "gcn_register_or_unspec_operand" "6U,6U,6U,6U"))))]
+ "register_operand (operands[1], VOIDmode) || register_operand (operands[2], VOIDmode)"
+ "@
+ v_sub_u32\t%0, %5, %2, %1
+ v_sub_u32\t%0, %5, %2, %1
+ v_subrev_u32\t%0, %5, %2, %1
+ v_subrev_u32\t%0, %5, %2, %1"
+ [(set_attr "type" "vop2,vop3b,vop2,vop3b")
+ (set_attr "mode" "SI")])
+
+(define_insn_and_split "*addv64di3_vector"
+ [(set (match_operand:V64DI 0 "register_operand" "=v")
+ (vec_merge:V64DI
+ (plus:V64DI
+ (match_operand:V64DI 1 "register_operand" "%v")
+ (match_operand:V64DI 2 "gcn_alu_operand" "vSSB"))
+ (match_operand:V64DI 4 "gcn_register_or_unspec_operand" "0U")
+ (match_operand:DI 3 "gcn_exec_reg_operand" "e")))
+ (clobber (reg:DI VCC_REG))]
+ ""
+ "#"
+ "gcn_can_split_p (V64DImode, operands[0])
+ && gcn_can_split_p (V64DImode, operands[1])
+ && gcn_can_split_p (V64DImode, operands[2])
+ && gcn_can_split_p (V64DImode, operands[4])"
+ [(const_int 0)]
+{
+ rtx vcc = gen_rtx_REG (DImode, VCC_REG);
+ emit_insn (gen_addv64si3_vector_vcc (gcn_operand_part (V64DImode, operands[0], 0),
+ gcn_operand_part (V64DImode, operands[1], 0),
+ gcn_operand_part (V64DImode, operands[2], 0),
+ operands[3],
+ gcn_operand_part (V64DImode, operands[4], 0),
+ vcc,
+ gcn_gen_undef (DImode)));
+ emit_insn (gen_addcv64si3_vec (gcn_operand_part (V64DImode, operands[0], 1),
+ gcn_operand_part (V64DImode, operands[1], 1),
+ gcn_operand_part (V64DImode, operands[2], 1),
+ operands[3],
+ gcn_operand_part (V64DImode, operands[4], 1),
+ vcc, vcc, gcn_vec_constant (V64SImode, 1),
+ gcn_vec_constant (V64SImode, 0),
+ gcn_gen_undef (DImode)));
+ DONE;
+})
+
+(define_insn_and_split "subv64di3_vec"
+ [(set (match_operand:V64DI 0 "register_operand" "=v,v")
+ (vec_merge:V64DI
+ (minus:V64DI
+ (match_operand:V64DI 1 "gcn_alu_operand" "vSSB,v")
+ (match_operand:V64DI 2 "gcn_alu_operand" "v,vSSB"))
+ (match_operand:V64DI 4 "gcn_register_or_unspec_operand" "0U,0U")
+ (match_operand:DI 3 "gcn_exec_reg_operand" "e,e")))
+ (clobber (reg:DI VCC_REG))]
+ "register_operand (operands[1], VOIDmode) || register_operand (operands[2], VOIDmode)"
+ "#"
+ "gcn_can_split_p (V64DImode, operands[0])
+ && gcn_can_split_p (V64DImode, operands[1])
+ && gcn_can_split_p (V64DImode, operands[2])
+ && gcn_can_split_p (V64DImode, operands[4])"
+ [(const_int 0)]
+{
+ rtx vcc = gen_rtx_REG (DImode, VCC_REG);
+ emit_insn (gen_subv64si3_vector_vcc (gcn_operand_part (V64DImode, operands[0], 0),
+ gcn_operand_part (V64DImode, operands[1], 0),
+ gcn_operand_part (V64DImode, operands[2], 0),
+ operands[3],
+ gcn_operand_part (V64DImode, operands[4], 0),
+ vcc,
+ gcn_gen_undef (DImode)));
+ emit_insn (gen_subcv64si3_vec (gcn_operand_part (V64DImode, operands[0], 1),
+ gcn_operand_part (V64DImode, operands[1], 1),
+ gcn_operand_part (V64DImode, operands[2], 1),
+ operands[3],
+ gcn_operand_part (V64DImode, operands[4], 1),
+ vcc, vcc, gcn_vec_constant (V64SImode, 1),
+ gcn_vec_constant (V64SImode, 0),
+ gcn_gen_undef (DImode)));
+ DONE;
+})
+
+(define_insn_and_split "addv64di3_zext"
+ [(set (match_operand:V64DI 0 "register_operand" "=&v,v")
+ (vec_merge:V64DI
+ (plus:V64DI
+ (zero_extend:V64DI
+ (match_operand:V64SI 1 "gcn_alu_operand" "0vA,0vB"))
+ (match_operand:V64DI 2 "gcn_alu_operand" "0vB,0vA"))
+ (match_operand:V64DI 4 "gcn_register_or_unspec_operand" "0U,0U")
+ (match_operand:DI 3 "gcn_exec_reg_operand" "e,e")))
+ (clobber (reg:DI VCC_REG))]
+ ""
+ "#"
+ "gcn_can_split_p (V64DImode, operands[0])
+ && gcn_can_split_p (V64DImode, operands[2])
+ && gcn_can_split_p (V64DImode, operands[4])"
+ [(const_int 0)]
+{
+ rtx vcc = gen_rtx_REG (DImode, VCC_REG);
+ emit_insn (gen_addv64si3_vector_vcc (gcn_operand_part (V64DImode, operands[0], 0),
+ operands[1],
+ gcn_operand_part (V64DImode, operands[2], 0),
+ operands[3],
+ gcn_operand_part (V64DImode, operands[4], 0),
+ vcc,
+ gcn_gen_undef (DImode)));
+ emit_insn (gen_addcv64si3_vec (gcn_operand_part (V64DImode, operands[0], 1),
+ gcn_operand_part (V64DImode, operands[2], 1),
+ const0_rtx,
+ operands[3],
+ gcn_operand_part (V64DImode, operands[4], 1),
+ vcc, vcc, gcn_vec_constant (V64SImode, 1),
+ gcn_vec_constant (V64SImode, 0),
+ gcn_gen_undef (DImode)));
+ DONE;
+})
+
+(define_insn_and_split "addv64di3_zext_dup"
+ [(set (match_operand:V64DI 0 "register_operand" "=&v")
+ (vec_merge:V64DI
+ (plus:V64DI
+ (zero_extend:V64DI
+ (vec_duplicate:V64SI
+ (match_operand:SI 1 "gcn_alu_operand" "BSS")))
+ (match_operand:V64DI 2 "gcn_alu_operand" "0vA"))
+ (match_operand:V64DI 4 "gcn_register_or_unspec_operand" "0U")
+ (match_operand:DI 3 "gcn_exec_reg_operand" "e")))
+ (clobber (reg:DI VCC_REG))]
+ ""
+ "#"
+ "gcn_can_split_p (V64DImode, operands[0])
+ && gcn_can_split_p (V64DImode, operands[2])
+ && gcn_can_split_p (V64DImode, operands[4])"
+ [(const_int 0)]
+{
+ rtx vcc = gen_rtx_REG (DImode, VCC_REG);
+ emit_insn (gen_addv64si3_vector_vcc_dup (gcn_operand_part (V64DImode, operands[0], 0),
+ gcn_operand_part (DImode, operands[1], 0),
+ gcn_operand_part (V64DImode, operands[2], 0),
+ operands[3],
+ gcn_operand_part (V64DImode, operands[4], 0),
+ vcc,
+ gcn_gen_undef (DImode)));
+ emit_insn (gen_addcv64si3_vec (gcn_operand_part (V64DImode, operands[0], 1),
+ gcn_operand_part (V64DImode, operands[2], 1),
+ const0_rtx, operands[3],
+ gcn_operand_part (V64DImode, operands[4], 1),
+ vcc, vcc, gcn_vec_constant (V64SImode, 1),
+ gcn_vec_constant (V64SImode, 0),
+ gcn_gen_undef (DImode)));
+ DONE;
+})
+
+(define_insn_and_split "addv64di3_zext_dup2"
+ [(set (match_operand:V64DI 0 "register_operand" "=v")
+ (vec_merge:V64DI
+ (plus:V64DI
+ (zero_extend:V64DI
+ (match_operand:V64SI 1 "gcn_alu_operand" "vA"))
+ (vec_duplicate:V64DI
+ (match_operand:DI 2 "gcn_alu_operand" "BSS")))
+ (match_operand:V64DI 4 "gcn_register_or_unspec_operand" "0U")
+ (match_operand:DI 3 "gcn_exec_reg_operand" "e")))
+ (clobber (reg:DI VCC_REG))]
+ ""
+ "#"
+ "gcn_can_split_p (V64DImode, operands[0])
+ && gcn_can_split_p (V64DImode, operands[4])"
+ [(const_int 0)]
+{
+ rtx vcc = gen_rtx_REG (DImode, VCC_REG);
+ emit_insn (gen_addv64si3_vector_vcc_dup (gcn_operand_part (V64DImode, operands[0], 0),
+ operands[1],
+ gcn_operand_part (DImode, operands[2], 0),
+ operands[3],
+ gcn_operand_part (V64DImode, operands[4], 0),
+ vcc,
+ gcn_gen_undef (DImode)));
+ rtx dsthi = gcn_operand_part (V64DImode, operands[0], 1);
+ emit_insn (gen_vec_duplicatev64si (dsthi, gcn_operand_part (DImode, operands[2], 1),
+ operands[3],
+ gcn_gen_undef (V64SImode)));
+ emit_insn (gen_addcv64si3_vec (dsthi, dsthi, const0_rtx, operands[3],
+ gcn_operand_part (V64DImode, operands[4], 1),
+ vcc, vcc, gcn_vec_constant (V64SImode, 1),
+ gcn_vec_constant (V64SImode, 0),
+ gcn_gen_undef (DImode)));
+ DONE;
+})
+
+(define_mode_iterator DS_ARITH_MODE [V64SI V64SF V64DI])
+
+(define_insn "add<mode>3_ds_<vec_suffix>"
+ [(set (match_operand:DS_ARITH_MODE 0 "gcn_ds_memory_operand" "=RD")
+ (vec_merge:DS_ARITH_MODE
+ (plus:DS_ARITH_MODE
+ (match_operand:DS_ARITH_MODE 1 "gcn_ds_memory_operand" "%RD")
+ (match_operand:DS_ARITH_MODE 2 "register_operand" "v"))
+ (match_operand:DS_ARITH_MODE 4 "gcn_register_ds_or_unspec_operand" "0U")
+ (match_operand:DI 3 "gcn_exec_reg_operand" "e")))]
+ "rtx_equal_p (operands[0], operands[1])"
+ "ds_add%u0\t%A0, %2%O0"
+ [(set_attr "type" "vop2")
+ (set_attr "mode" "SI")])
+
+(define_insn "sub<mode>3_ds_<vec_suffix>"
+ [(set (match_operand:DS_ARITH_MODE 0 "gcn_ds_memory_operand" "=RD")
+ (vec_merge:DS_ARITH_MODE
+ (minus:DS_ARITH_MODE
+ (match_operand:DS_ARITH_MODE 1 "gcn_ds_memory_operand" "RD")
+ (match_operand:DS_ARITH_MODE 2 "register_operand" "v"))
+ (match_operand:DS_ARITH_MODE 4 "gcn_register_ds_or_unspec_operand" "0U")
+ (match_operand:DI 3 "gcn_exec_reg_operand" "e")))]
+ "rtx_equal_p (operands[0], operands[1])"
+ "ds_sub%u0\t%A0, %2%O0"
+ [(set_attr "type" "vop2")
+ (set_attr "mode" "SI")])
+
+(define_insn "subr<mode>3_ds_<vec_suffix>"
+ [(set (match_operand:DS_ARITH_MODE 0 "gcn_ds_memory_operand" "=RD")
+ (vec_merge:DS_ARITH_MODE
+ (minus:DS_ARITH_MODE
+ (match_operand:DS_ARITH_MODE 2 "register_operand" "v")
+ (match_operand:DS_ARITH_MODE 1 "gcn_ds_memory_operand" "RD"))
+ (match_operand:DS_ARITH_MODE 4 "gcn_register_ds_or_unspec_operand" "0U")
+ (match_operand:DI 3 "gcn_exec_reg_operand" "e")))]
+ "rtx_equal_p (operands[0], operands[1])"
+ "ds_rsub%u0\t%A0, %2%O0"
+ [(set_attr "type" "vop2")
+ (set_attr "mode" "SI")])
+
+;; -------------------------------------------------------------------------
+;; ALU: mult
+;; -------------------------------------------------------------------------
+
+(define_code_iterator any_extend [sign_extend zero_extend])
+(define_code_attr sgnsuffix [(sign_extend "%i") (zero_extend "%u")])
+(define_code_attr s [(sign_extend "s") (zero_extend "u")])
+
+(define_insn "<s>mulsi3_highpart_vector"
+ [(set (match_operand:V64SI 0 "register_operand" "=v")
+ (vec_merge:V64SI
+ (truncate:V64SI
+ (lshiftrt:V64DI
+ (mult:V64DI
+ (any_extend:V64DI
+ (match_operand:V64SI 1 "gcn_alu_operand" "%v"))
+ (any_extend:V64DI
+ (match_operand:V64SI 2 "gcn_alu_operand" "vSSB")))
+ (const_int 32)))
+ (match_operand:V64SI 4 "gcn_register_ds_or_unspec_operand" "0U")
+ (match_operand:DI 3 "gcn_exec_reg_operand" "e")))]
+ ""
+ "v_mul_hi<sgnsuffix>0\t%0, %1, %2"
+ [(set_attr "type" "vop3a")
+ (set_attr "mode" "SI")])
+
+(define_insn "<s>mulsi3_highpart_scalar"
+ [(set (match_operand:SI 0 "register_operand" "=v")
+ (truncate:SI
+ (lshiftrt:DI
+ (mult:DI
+ (any_extend:DI
+ (match_operand:SI 1 "register_operand" "%v"))
+ (any_extend:DI
+ (match_operand:SI 2 "gcn_alu_operand" "vSSB")))
+ (const_int 32))))
+ (use (match_operand:DI 3 "gcn_exec_reg_operand" "e"))]
+ ""
+ "v_mul_hi<sgnsuffix>0\t%0, %1, %2"
+ [(set_attr "type" "vop3a")
+ (set_attr "mode" "SI")])
+
+;; -------------------------------------------------------------------------
+;; ALU the generic 32bit case
+;; -------------------------------------------------------------------------
+
+(define_insn "<expander><mode>3_<vec_suffix>"
+ [(set (match_operand:VEC_1REG_INT_MODE 0 "gcn_valu_dst_operand" "=v,RD")
+ (vec_merge:VEC_1REG_INT_MODE
+ (bitop:VEC_1REG_INT_MODE
+ (match_operand:VEC_1REG_INT_MODE 1 "gcn_valu_src0_operand" "%v,0")
+ (match_operand:VEC_1REG_INT_MODE 2 "gcn_valu_src1com_operand" "vSSB,v"))
+ (match_operand:VEC_1REG_INT_MODE 4 "gcn_register_ds_or_unspec_operand" "0U,0U")
+ (match_operand:DI 3 "gcn_exec_reg_operand" "e,e")))]
+ "!memory_operand (operands[0], VOIDmode)
+ || (rtx_equal_p (operands[0], operands[1]) && register_operand (operands[2], VOIDmode))"
+ "@
+ v_<mnemonic>0\t%0, %2, %1
+ ds_<mnemonic>0\t%A0, %2%O0"
+ [(set_attr "type" "vop2,dsmem")
+ (set_attr "mode" "V64SI")])
+
+; We add earlyclobber just because I am lazy to determine order of oeprations
+; in output template. VGPR register pairs are not aligned.
+(define_insn "<expander>v64di3_<vec_suffix>"
+ [(set (match_operand:V64DI 0 "gcn_valu_dst_operand" "=&v,RD")
+ (vec_merge:V64DI
+ (bitop:V64DI
+ (match_operand:V64DI 1 "gcn_valu_src0_operand" "%0v,RD")
+ (match_operand:V64DI 2 "gcn_valu_src1com_operand" "v0SSB,v"))
+ (match_operand:V64DI 4 "gcn_register_ds_or_unspec_operand" "0U,0U")
+ (match_operand:DI 3 "gcn_exec_reg_operand" "e,e")))]
+ "!memory_operand (operands[0], VOIDmode)
+ || (rtx_equal_p (operands[0], operands[1]) && register_operand (operands[2], VOIDmode))"
+ "@
+ v_<mnemonic>0\t%L0, %L2, %L1\n\tv_<mnemonic>0\t%H0, %H2, %H1
+ ds_<mnemonic>0\t%A0, %2%O0"
+ [(set_attr "type" "vop2,dsmem")
+ (set_attr "mode" "V64SI,V64DI")])
+
+(define_insn "<expander><mode>3_<vec_suffix>"
+ [(set (match_operand:V_INT_MODE 0 "register_operand" "=v")
+ (vec_merge:V_INT_MODE
+ (shiftop:V_INT_MODE
+ (match_operand:V_INT_MODE 1 "gcn_alu_operand" "v")
+ (match_operand:V64SI 2 "gcn_alu_operand" "vSSB"))
+ (match_operand:V_INT_MODE 4 "gcn_register_or_unspec_operand" "0U")
+ (match_operand:DI 3 "gcn_exec_reg_operand" "e")))]
+ ""
+ "v_<revmnemonic>0\t%0, %2, %1"
+ [(set_attr "type" "vop2")
+ (set_attr "mode" "<MODE>")])
+
+;; -------------------------------------------------------------------------
+;; Generic FP binary operations
+;; -------------------------------------------------------------------------
+
+(define_mode_iterator VEC_FP_MODE
+ [V64HF V64SF V64DF])
+(define_mode_iterator FP_MODE
+ [HF SF DF])
+
+(define_code_iterator comm_fp [plus mult smin smax])
+(define_code_iterator nocomm_fp [minus])
+(define_code_iterator all_fp [plus mult minus smin smax])
+
+(define_insn "<expander><mode>_<vec_suffix>"
+ [(set (match_operand:VEC_FP_MODE 0 "register_operand" "=v")
+ (vec_merge:VEC_FP_MODE
+ (comm_fp:VEC_FP_MODE
+ (match_operand:VEC_FP_MODE 1 "register_operand" "%v")
+ (match_operand:VEC_FP_MODE 2 "gcn_alu_operand" " vSSB"))
+ (match_operand:VEC_FP_MODE 4 "gcn_register_or_unspec_operand" "0U")
+ (match_operand:DI 3 "gcn_exec_reg_operand" "e")))]
+ ""
+ "v_<mnemonic>0\t%0, %2, %1"
+ [(set_attr "type" "vop2")
+ (set_attr "mode" "<MODE>")])
+
+
+(define_insn "<expander><mode>_<vec_suffix>"
+ [(set (match_operand:VEC_FP_MODE 0 "register_operand" "=v,v")
+ (vec_merge:VEC_FP_MODE
+ (nocomm_fp:VEC_FP_MODE
+ (match_operand:VEC_FP_MODE 1 "register_operand" "vSSB,v")
+ (match_operand:VEC_FP_MODE 2 "gcn_alu_operand" "v,vSSB"))
+ (match_operand:VEC_FP_MODE 4 "gcn_register_or_unspec_operand" "0U,0U")
+ (match_operand:DI 3 "gcn_exec_reg_operand" "e,e")))]
+ ""
+ "@
+ v_<mnemonic>0\t%0, %2, %1
+ v_<revmnemonic>0\t%0, %2, %1"
+ [(set_attr "type" "vop2")
+ (set_attr "mode" "<MODE>")])
+
+(define_expand "<expander><mode>3"
+ [(set (match_operand:VEC_FP_MODE 0 "gcn_valu_dst_operand" "")
+ (vec_merge:VEC_FP_MODE
+ (all_fp:VEC_FP_MODE
+ (match_operand:VEC_FP_MODE 1 "gcn_valu_src0_operand" "")
+ (match_operand:VEC_FP_MODE 2 "gcn_valu_src1_operand" ""))
+ (match_dup 4)
+ (match_dup 3)))]
+ ""
+{
+ operands[3] = gcn_full_exec_reg ();
+ operands[4] = gcn_gen_undef (<MODE>mode);
+})
+
+(define_expand "<expander><mode>3"
+ [(parallel [(set (match_operand:FP_MODE 0 "gcn_valu_dst_operand" "")
+ (all_fp:FP_MODE
+ (match_operand:FP_MODE 1 "gcn_valu_src0_operand" "")
+ (match_operand:FP_MODE 2 "gcn_valu_src1_operand" "")))
+ (use (match_dup 3))])]
+ ""
+{
+ operands[3] = gcn_scalar_exec ();
+})
diff --git a/gcc/config/gcn/gcn.c b/gcc/config/gcn/gcn.c
new file mode 100644
index 00000000000..c80bdf5ccc9
--- /dev/null
+++ b/gcc/config/gcn/gcn.c
@@ -0,0 +1,2905 @@
+/* Copyright (C) 2016-2017 Free Software Foundation, Inc.
+
+ This file is free software; you can redistribute it and/or modify it under
+ the terms of the GNU General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your option)
+ any later version.
+
+ This file is distributed in the hope that it will be useful, but WITHOUT
+ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with GCC; see the file COPYING3. If not see
+ <http://www.gnu.org/licenses/>. */
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "backend.h"
+#include "target.h"
+#include "memmodel.h"
+#include "rtl.h"
+#include "tree.h"
+#include "gimple.h"
+#include "cfghooks.h"
+#include "cfgloop.h"
+#include "df.h"
+#include "tm_p.h"
+#include "stringpool.h"
+#include "expmed.h"
+#include "optabs.h"
+#include "regs.h"
+#include "emit-rtl.h"
+#include "recog.h"
+#include "diagnostic-core.h"
+#include "insn-attr.h"
+#include "alias.h"
+#include "fold-const.h"
+#include "stor-layout.h"
+#include "calls.h"
+#include "varasm.h"
+#include "explow.h"
+#include "expr.h"
+#include "output.h"
+#include "cfgrtl.h"
+#include "cfgbuild.h"
+#include "langhooks.h"
+#include "reload.h"
+#include "sched-int.h"
+#include "params.h"
+#include "gimplify.h"
+#include "tm-constrs.h"
+#include "ddg.h"
+#include "dumpfile.h"
+#include "builtins.h"
+#include "rtl-iter.h"
+#include "cfghooks.h"
+
+/* This file should be included last. */
+#include "target-def.h"
+
+static REAL_VALUE_TYPE dconst4, dconst1over2pi;
+static bool ext_gcn_constants_init = 0;
+
+
+/* Register modes. */
+
+/* Return true if MODE is valid for pair of VGPR registers. */
+
+static inline bool
+vgpr_2reg_mode_p (enum machine_mode mode)
+{
+ return mode == DImode || mode == DFmode || mode == V64DFmode
+ || mode == V64DImode /*|| mode == V64BImode*/;
+}
+
+/* Return true if MODE can be handled directly by VGPR operations. */
+
+static inline bool
+vgpr_vector_mode_p (enum machine_mode mode)
+{
+ return mode == V64HImode || mode == V64SImode || mode == V64DImode
+ || mode == V64HFmode || mode == V64SFmode || mode == V64DFmode;
+}
+
+
+/* Return true if MODE is valid for pair of SGPR registers. */
+
+static inline bool
+sgpr_2reg_mode_p (enum machine_mode mode)
+{
+ return mode == DImode || mode == DFmode /*|| mode == V32BImode*/;
+}
+
+/* Return number of hard register needed to hold value of MODE in REGNO. */
+
+int
+gcn_hard_regno_nregs (int regno, enum machine_mode mode)
+{
+ /* Scalar registers are 32bit, vector registers are in fact tuples of
+ 64 lanes. */
+ if (VGPR_REGNO_P (regno))
+ {
+ if (vgpr_1reg_mode_p (mode))
+ return 1;
+ if (vgpr_2reg_mode_p (mode))
+ return 2;
+ }
+ return CEIL (GET_MODE_SIZE (mode), 4);
+}
+
+/* Register classes. */
+
+/* Implement the TARGET_CLASS_MAX_NREGS hook.
+
+ On the 80386, this is the size of MODE in words,
+ except in the FP regs, where a single reg is always enough. */
+
+static unsigned char
+gcn_class_max_nregs (reg_class_t rclass, machine_mode mode)
+{
+ if (rclass == VGPR_REGS)
+ {
+ if (vgpr_1reg_mode_p (mode))
+ return 1;
+ if (vgpr_2reg_mode_p (mode))
+ return 2;
+ }
+ return CEIL (GET_MODE_SIZE (mode), 4);
+}
+
+/* Return true if REGNO can hold value in MODE. */
+
+bool
+gcn_hard_regno_mode_ok (int regno, machine_mode mode)
+{
+ switch (regno)
+ {
+ case FLAT_SCRATCH_LO_REG:
+ case XNACK_MASK_LO_REG:
+ case TBA_LO_REG:
+ case TMA_LO_REG:
+ return (mode == SImode || mode == DImode);
+ case VCC_LO_REG:
+ case EXEC_LO_REG:
+ return (mode == CCmode || mode == CCmode
+ || mode == SImode || mode == DImode
+ /*|| mode == V32BImode || mode == V64BImode*/);
+ case M0_REG:
+ case FLAT_SCRATCH_HI_REG:
+ case XNACK_MASK_HI_REG:
+ case TBA_HI_REG:
+ case TMA_HI_REG:
+ return mode == SImode;
+ case VCC_HI_REG:
+ case EXEC_HI_REG:
+ return mode == SImode /*|| mode == V32BImode*/;
+ case SCC_REG:
+ case VCCZ_REG:
+ case EXECZ_REG:
+ return mode == BImode;
+ }
+ /* FIXME: We restrict double register values to aligned registers. */
+ if (SGPR_REGNO_P (regno))
+ return (sgpr_1reg_mode_p (mode)
+ || (!((regno - FIRST_VGPR_REG)&1) && sgpr_2reg_mode_p (mode)));
+ if (VGPR_REGNO_P (regno))
+ return (vgpr_1reg_mode_p (mode) || vgpr_2reg_mode_p (mode));
+ return false;
+}
+
+/* Return smallest class containing REGNO. */
+
+enum reg_class
+gcn_regno_reg_class (int regno)
+{
+ switch (regno)
+ {
+ case SCC_REG:
+ return SCC_CONDITIONAL_REG;
+ case VCCZ_REG:
+ return VCCZ_CONDITIONAL_REG;
+ case EXECZ_REG:
+ return EXECZ_CONDITIONAL_REG;
+ case EXEC_LO_REG:
+ case EXEC_HI_REG:
+ return EXEC_MASK_REG;
+ }
+ if (VGPR_REGNO_P (regno))
+ return VGPR_REGS;
+ if (SGPR_REGNO_P (regno))
+ return SGPR_REGS;
+ if (regno < FIRST_VGPR_REG)
+ return GENERAL_REGS;
+ return ALL_REGS;
+}
+
+/* GCC assumes that lowpart contains first part of value as stored in memory.
+ This is not the case for vector registers. */
+
+bool gcn_cannot_change_mode_class (machine_mode from, machine_mode to,
+ int regclass)
+{
+ if (!vgpr_vector_mode_p (from) && !vgpr_vector_mode_p (to))
+ return false;
+ return gcn_class_max_nregs (regclass, from) != gcn_class_max_nregs (regclass, to);
+}
+
+static void
+print_reg (FILE *file, rtx x)
+{
+ enum machine_mode mode = GET_MODE (x);
+ if (mode == SImode || mode == BImode || mode == V64SImode
+ || mode == HFmode || mode == SFmode || mode == V64SFmode
+ || mode == V64SImode)
+ fprintf (file, "%s", reg_names[REGNO (x)]);
+ else if (mode == DImode || mode == V64DImode
+ || mode == DFmode || mode == V64DFmode)
+ {
+ if (SGPR_REGNO_P (REGNO (x)))
+ fprintf (file, "s[%i:%i]", REGNO (x) - FIRST_SGPR_REG,
+ REGNO (x) - FIRST_SGPR_REG + 1);
+ else if (VGPR_REGNO_P (REGNO (x)))
+ fprintf (file, "v[%i:%i]", REGNO (x) - FIRST_VGPR_REG,
+ REGNO (x) - FIRST_VGPR_REG + 1);
+ else if (REGNO (x) == FLAT_SCRATCH_REG)
+ fprintf (file, "flat_scratch");
+ else if (REGNO (x) == EXEC_REG)
+ fprintf (file, "exec");
+ else if (REGNO (x) == VCC_LO_REG)
+ fprintf (file, "vcc");
+ else
+ fprintf (file, "[%s:%s]",
+ reg_names [REGNO (x)], reg_names [REGNO (x)+1]);
+ }
+ else if (mode == TImode)
+ {
+ if (SGPR_REGNO_P (REGNO (x)))
+ fprintf (file, "s[%i:%i]", REGNO (x) - FIRST_SGPR_REG,
+ REGNO (x) - FIRST_SGPR_REG + 3);
+ else if (VGPR_REGNO_P (REGNO (x)))
+ fprintf (file, "v[%i:%i]", REGNO (x) - FIRST_VGPR_REG,
+ REGNO (x) - FIRST_VGPR_REG + 3);
+ else
+ gcc_unreachable ();
+ }
+ else
+ gcc_unreachable ();
+}
+
+
+/* Initialize the table of extra 80387 mathematical constants. */
+
+static void
+init_ext_gcn_constants (void)
+{
+ real_from_integer (&dconst4, DFmode, 4, SIGNED);
+
+ /* FIXME: this constant probably does not match what hardware really loads.
+ Reality check it eventually. */
+ real_from_string (&dconst1over2pi, "0.1591549430918953357663423455968866839");
+ real_convert (&dconst1over2pi, SFmode, &dconst1over2pi);
+
+ ext_gcn_constants_init = 1;
+}
+
+/* Return non-zero if X is a constant that can appear as inline operation.
+ This is 0, 0.5, -0.5, 1, -1, 2, -2, 4,-4, 1/(2*pi)
+ Or a vector of those.
+ The value returned should be the encoding of this constant. */
+
+int
+gcn_inline_fp_constant_p (rtx x, bool allow_vector)
+{
+ machine_mode mode = GET_MODE (x);
+
+ if ((mode == V64HFmode || mode == V64SFmode || mode == V64DFmode)
+ && allow_vector)
+ {
+ int n;
+ if (GET_CODE (x) != CONST_VECTOR)
+ return 0;
+ n = gcn_inline_fp_constant_p (CONST_VECTOR_ELT (x, 0), false);
+ if (!n)
+ return 0;
+ for (int i = 1; i < 64; i++)
+ if (CONST_VECTOR_ELT (x, i) != CONST_VECTOR_ELT (x, 0))
+ return 0;
+ return 1;
+ }
+
+ if (mode != HFmode && mode != SFmode && mode != DFmode)
+ return 0;
+
+ const REAL_VALUE_TYPE *r;
+
+ if (x == CONST0_RTX (mode))
+ return 128;
+ if (x == CONST1_RTX (mode))
+ return 242;
+
+ r = CONST_DOUBLE_REAL_VALUE (x);
+
+ if (real_identical (r, &dconstm1))
+ return 243;
+
+ if (real_identical (r, &dconsthalf))
+ return 240;
+ if (real_identical (r, &dconstm1))
+ return 243;
+ if (real_identical (r, &dconst2))
+ return 244;
+ if (real_identical (r, &dconst4))
+ return 246;
+ if (real_identical (r, &dconst1over2pi))
+ return 248;
+ if (!ext_gcn_constants_init)
+ init_ext_gcn_constants ();
+ real_value_negate (r);
+ if (real_identical (r, &dconsthalf))
+ return 241;
+ if (real_identical (r, &dconst2))
+ return 245;
+ if (real_identical (r, &dconst4))
+ return 247;
+
+ /* FIXME: add 4, -4 and 1/(2*PI). */
+
+ return 0;
+}
+
+/* Return non-zero if X is a constant that can appear as inline operation.
+ This is 0, 0.5, -0.5, 1, -1, 2, -2, 4,-4, 1/(2*pi)
+ Or a vector of those.
+ The value returned should be the encoding of this constant. */
+
+bool
+gcn_fp_constant_p (rtx x, bool allow_vector)
+{
+ machine_mode mode = GET_MODE (x);
+
+ if ((mode == V64HFmode || mode == V64SFmode || mode == V64DFmode)
+ && allow_vector)
+ {
+ int n;
+ if (GET_CODE (x) != CONST_VECTOR)
+ return false;
+ n = gcn_fp_constant_p (CONST_VECTOR_ELT (x, 0), false);
+ if (!n)
+ return false;
+ for (int i = 1; i < 64; i++)
+ if (CONST_VECTOR_ELT (x, i) != CONST_VECTOR_ELT (x, 0))
+ return false;
+ return true;
+ }
+ if (mode != HFmode && mode != SFmode && mode != DFmode)
+ return false;
+
+ if (gcn_inline_fp_constant_p (x, false))
+ return true;
+ /* FIXME: It is not clear how 32bit immediates are interpreted here. */
+ return (mode != DFmode);
+}
+
+/* Return true if X is constant representable as inline constant. */
+
+bool
+gcn_inline_constant_p (rtx x)
+{
+ if (GET_CODE (x) == CONST_INT)
+ return INTVAL (x) >= -16 && INTVAL (x) < 64;
+ if (GET_CODE (x) == CONST_DOUBLE)
+ return gcn_inline_fp_constant_p (x, false);
+ if (GET_CODE (x) == CONST_VECTOR)
+ {
+ int n;
+ if (!vgpr_vector_mode_p (GET_MODE (x)))
+ return false;
+ if (GET_CODE (x) != CONST_VECTOR)
+ return false;
+ n = gcn_inline_constant_p (CONST_VECTOR_ELT (x, 0));
+ if (!n)
+ return false;
+ for (int i = 1; i < 64; i++)
+ if (CONST_VECTOR_ELT (x, i) != CONST_VECTOR_ELT (x, 0))
+ return false;
+ return 1;
+ }
+ return false;
+}
+
+/* Return true if X is constant representable as inline constant. */
+
+bool
+gcn_constant_p (rtx x)
+{
+ if (GET_CODE (x) == CONST_INT)
+ return trunc_int_for_mode (INTVAL (x), SImode) == INTVAL (x);
+ if (GET_CODE (x) == CONST_DOUBLE)
+ return gcn_fp_constant_p (x, false);
+ if (GET_CODE (x) == CONST_VECTOR)
+ {
+ int n;
+ if (!vgpr_vector_mode_p (GET_MODE (x)))
+ return false;
+ if (GET_CODE (x) != CONST_VECTOR)
+ return false;
+ n = gcn_constant_p (CONST_VECTOR_ELT (x, 0));
+ if (!n)
+ return false;
+ for (int i = 1; i < 64; i++)
+ if (CONST_VECTOR_ELT (x, i) != CONST_VECTOR_ELT (x, 0))
+ return false;
+ return 1;
+ }
+ return false;
+}
+
+/* GCN defines multiple ways to address memory. */
+
+enum gcn_address_type
+{
+ GCN_ADDR_LOAD,
+ GCN_ADDR_STORE,
+ GCN_ADDR_FLAT,
+ GCN_ADDR_DS,
+ GCN_ADDR_BUFFERED,
+};
+
+/* Determine instruction used to access memory in ADDRSPACE. */
+
+enum gcn_address_type
+gcn_addr_space_type (addr_space_t addrspace)
+{
+ switch (addrspace)
+ {
+ case ADDR_SPACE_SCRATCH:
+ return GCN_ADDR_BUFFERED;
+ case ADDR_SPACE_FLAT:
+ return GCN_ADDR_FLAT;
+ case ADDR_SPACE_SCALAR_FLAT:
+ return GCN_ADDR_LOAD;
+ case ADDR_SPACE_LDS:
+ case ADDR_SPACE_GDS:
+ return GCN_ADDR_DS;
+ default:
+ gcc_unreachable ();
+ }
+}
+
+void
+print_operand_address_1 (FILE * file, register rtx addr, addr_space_t as)
+{
+ rtx reg;
+ rtx offset;
+ gcn_address_type type = gcn_addr_space_type (as);
+
+ if (type == GCN_ADDR_BUFFERED)
+ switch (GET_CODE (addr))
+ {
+ case REG:
+ print_reg (file, addr);
+ break;
+
+ case PLUS:
+ reg = XEXP (addr, 0);
+ offset = XEXP (addr, 1);
+ print_reg (file, reg);
+ if (GET_CODE (offset) == CONST_INT)
+ fprintf (file, " offset:" HOST_WIDE_INT_PRINT_DEC,
+ INTVAL (offset));
+ else
+ abort ();
+ break;
+
+ default:
+ debug_rtx (addr);
+ abort ();
+ }
+ else if (type == GCN_ADDR_FLAT)
+ {
+ gcc_assert (GET_CODE (addr) == REG);
+ print_reg (file, addr);
+ }
+ else if (type == GCN_ADDR_DS)
+ switch (GET_CODE (addr))
+ {
+ case REG:
+ print_reg (file, addr);
+ break;
+
+ case PLUS:
+ reg = XEXP (addr, 0);
+ print_reg (file, reg);
+ break;
+
+ default:
+ debug_rtx (addr);
+ abort ();
+ }
+ else
+ switch (GET_CODE (addr))
+ {
+ case REG:
+ print_reg (file, addr);
+ fprintf (file, ", 0");
+ break;
+
+ case PLUS:
+ reg = XEXP (addr, 0);
+ offset = XEXP (addr, 1);
+ print_reg (file, reg);
+ fprintf (file, ", ");
+ if (GET_CODE (offset) == REG)
+ print_reg (file, reg);
+ else if (GET_CODE (offset) == CONST_INT)
+ fprintf (file, HOST_WIDE_INT_PRINT_DEC,
+ INTVAL (offset));
+ else
+ abort ();
+ break;
+
+ default:
+ debug_rtx (addr);
+ abort ();
+ }
+}
+
+void
+print_operand_address (FILE * file, register rtx addr)
+{
+ print_operand_address_1 (file, addr, ADDR_SPACE_FLAT);
+}
+
+/* ASM printing.
+ b - print operand size as untyped operand (b16/b32/b64)
+ i - print operand size as untyped operand (i16/b32/i64)
+ u - print operand size as untyped operand (u16/u32/u64)
+ s - print operand size as memory access size
+ (byte/short/dword/dwordx2/wordx3/dwordx4)
+ C - print conditional code for s_cbranch (_sccz/_sccnz/_vccz/_vccnz...)
+ D - print conditional code for s_cmp (eq_u64/lg_u64...)
+ E - print conditional code for v_cmp (eq_u64/ne_u64...)
+ A - print address in formating suitable for given address space.
+ O - print offset:n for data share operations.
+ */
+
+void
+print_operand (FILE * file, rtx x, int code)
+{
+ int xcode = GET_CODE (x);
+ switch (code)
+ {
+ /* Instructions has following suffixes.
+ If there are two suffixes, first is destination type, second is
+ source type.
+
+ B32 Bitfield (untyped data) 32-bit
+ B64 Bitfield (untyped data) 64-bit
+ F16 floating-point 16-bit
+ F32 floating-point 32-bit (IEEE 754 single-precision float)
+ F64 floating-point 64-bit (IEEE 754 double-precision float)
+ I16 signed 32-bit integer
+ I32 signed 32-bit integer
+ I64 signed 64-bit integer
+ U16 unsigned 32-bit integer
+ U32 unsigned 32-bit integer
+ U64 unsigned 64-bit integer */
+
+ /* Print oprande size as untyped suffix. */
+ case 'b':
+ {
+ const char *s="";
+ enum machine_mode mode = GET_MODE (x);
+ if (VECTOR_MODE_P (mode))
+ mode = GET_MODE_INNER (mode);
+ switch (GET_MODE_SIZE (mode))
+ {
+ case 2:
+ s="_b16";
+ break;
+ case 4:
+ s="_b32";
+ break;
+ case 8:
+ s="_b64";
+ break;
+ default:
+ output_operand_lossage ("invalid operand %%xn code");
+ return;
+ }
+ fputs (s, file);
+ }
+ return;
+ case 'i':
+ {
+ const char *s="";
+ enum machine_mode mode = GET_MODE (x);
+ if (VECTOR_MODE_P (mode))
+ mode = GET_MODE_INNER (mode);
+ if (FLOAT_MODE_P (mode))
+ switch (GET_MODE_SIZE (mode))
+ {
+ case 2:
+ s="_f16";
+ break;
+ case 4:
+ s="_f32";
+ break;
+ case 8:
+ s="_f64";
+ break;
+ default:
+ output_operand_lossage ("invalid operand %%xn code");
+ return;
+ }
+ else
+ switch (GET_MODE_SIZE (mode))
+ {
+ case 2:
+ s="_i16";
+ break;
+ case 4:
+ s="_i32";
+ break;
+ case 8:
+ s="_i64";
+ break;
+ default:
+ output_operand_lossage ("invalid operand %%xn code");
+ return;
+ }
+ fputs (s, file);
+ }
+ return;
+ case 'u':
+ {
+ const char *s="";
+ enum machine_mode mode = GET_MODE (x);
+ if (VECTOR_MODE_P (mode))
+ mode = GET_MODE_INNER (mode);
+ switch (GET_MODE_SIZE (mode))
+ {
+ case 2:
+ s="_u16";
+ break;
+ case 4:
+ s="_u32";
+ break;
+ case 8:
+ s="_u64";
+ break;
+ default:
+ output_operand_lossage ("invalid operand %%xn code");
+ return;
+ }
+ fputs (s, file);
+ }
+ return;
+ /* Print oprande size as untyped suffix. */
+ case 's':
+ {
+ const char *s="";
+ switch (GET_MODE_SIZE (GET_MODE (x)))
+ {
+ case 1:
+ s="_byte";
+ break;
+ case 2:
+ s="_short";
+ break;
+ case 4:
+ s="_dword";
+ break;
+ case 8:
+ s="_dwordx2";
+ break;
+ case 12:
+ s="_dwordx3";
+ break;
+ case 16:
+ s="_dwordx4";
+ break;
+ case 32:
+ s="_dwordx8";
+ break;
+ case 64:
+ s="_dwordx16";
+ break;
+ case 256:
+ s="_dword";
+ break;
+ default:
+ output_operand_lossage ("invalid operand %%xn code");
+ return;
+ }
+ fputs (s, file);
+ }
+ return;
+ case 'A':
+ if (xcode != MEM)
+ {
+ output_operand_lossage ("invalid %%xn code");
+ return;
+ }
+ print_operand_address_1 (file, XEXP (x, 0), MEM_ADDR_SPACE (x));
+ return;
+ case 'O':
+ {
+ if (xcode != MEM)
+ {
+ output_operand_lossage ("invalid %%xn code");
+ return;
+ }
+ if ((unsigned) gcn_addr_space_type (MEM_ADDR_SPACE (x))
+ == ADDR_SPACE_GDS)
+ fprintf (file, " gds");
+ if (GET_CODE (XEXP (x, 0)) == REG)
+ return;
+ if (GET_CODE (XEXP (x, 0)) != PLUS)
+ {
+ output_operand_lossage ("invalid %%xn code");
+ return;
+ }
+ rtx val = XEXP (x, (XEXP (x, 0), 1));
+ if (GET_CODE (val) == CONST_VECTOR)
+ val = CONST_VECTOR_ELT (val, 0);
+ if (GET_CODE (val) != CONST_INT)
+ {
+ output_operand_lossage ("invalid %%xn code");
+ return;
+ }
+ fprintf (file, " offset:" HOST_WIDE_INT_PRINT_DEC,
+ INTVAL (val));
+
+ }
+ return;
+ case 'C':
+ {
+ const char *s;
+ bool num = false;
+ if ((xcode != EQ && xcode != NE)
+ || !REG_P (XEXP (x, 0)))
+ {
+ output_operand_lossage ("invalid %%xn code");
+ return;
+ }
+ switch (REGNO (XEXP (x, 0)))
+ {
+ case VCCZ_REG:
+ s="_vcc";
+ break;
+ case SCC_REG:
+ /* For some reason llvm as insist on scc0 instead of sccz. */
+ num = true;
+ s="_scc";
+ break;
+ case EXECZ_REG:
+ s="_exec";
+ break;
+ default:
+ output_operand_lossage ("invalid %%xn code");
+ return;
+ }
+ fputs (s, file);
+ if (xcode == EQ)
+ fputc (num ? '0' : 'z', file);
+ else
+ fputs (num ? "1" : "nz", file);
+ return;
+ }
+ case 'D':
+ {
+ const char *s;
+ bool cmp_signed = false;
+ switch (xcode)
+ {
+ case EQ:
+ s="_eq_";
+ break;
+ case NE:
+ s="_lg_";
+ break;
+ case LT:
+ s="_lt_";
+ cmp_signed = true;
+ break;
+ case LE:
+ s="_le_";
+ cmp_signed = true;
+ break;
+ case GT:
+ s="_gt_";
+ cmp_signed = true;
+ break;
+ case GE:
+ s="_ge_";
+ cmp_signed = true;
+ break;
+ case LTU:
+ s="_lt_";
+ break;
+ case LEU:
+ s="_le_";
+ break;
+ case GTU:
+ s="_gt_";
+ break;
+ case GEU:
+ s="_ge_";
+ break;
+ default:
+ output_operand_lossage ("invalid %%xn code");
+ return;
+ }
+ fputs (s, file);
+ fputc (cmp_signed ? 'i' : 'u', file);
+
+ enum machine_mode mode = GET_MODE (XEXP (x, 0));
+ if (mode == VOIDmode)
+ mode = GET_MODE (XEXP (x, 1));
+ switch (GET_MODE_SIZE (mode))
+ {
+ case 4:
+ s="32";
+ break;
+ case 8:
+ s="64";
+ break;
+ default:
+ output_operand_lossage ("invalid operand %%xn code");
+ return;
+ }
+ fputs (s, file);
+ return;
+ }
+ case 'E':
+ {
+ const char *s;
+ bool cmp_signed = false;
+ switch (xcode)
+ {
+ case EQ:
+ s="_eq_";
+ break;
+ case NE:
+ s="_ne_";
+ break;
+ case LT:
+ s="_lt_";
+ cmp_signed = true;
+ break;
+ case LE:
+ s="_le_";
+ cmp_signed = true;
+ break;
+ case GT:
+ s="_gt_";
+ cmp_signed = true;
+ break;
+ case GE:
+ s="_ge_";
+ cmp_signed = true;
+ break;
+ case LTU:
+ s="_lt_";
+ break;
+ case LEU:
+ s="_le_";
+ break;
+ case GTU:
+ s="_gt_";
+ break;
+ case GEU:
+ s="_ge_";
+ break;
+ default:
+ output_operand_lossage ("invalid %%xn code");
+ return;
+ }
+ fputs (s, file);
+ fputc (cmp_signed ? 'i' : 'u', file);
+
+ enum machine_mode mode = GET_MODE (XEXP (x, 0));
+ if (mode == VOIDmode)
+ mode = GET_MODE (XEXP (x, 1));
+ switch (GET_MODE_SIZE (mode))
+ {
+ case 4:
+ s="32";
+ break;
+ case 8:
+ s="64";
+ break;
+ default:
+ output_operand_lossage ("invalid operand %%xn code");
+ return;
+ }
+ fputs (s, file);
+ return;
+ }
+ case 'L':
+ print_operand (file, gcn_operand_part (GET_MODE (x), x, 0), 0);
+ return;
+ case 'H':
+ print_operand (file, gcn_operand_part (GET_MODE (x), x, 1), 0);
+ return;
+ case 0:
+ if (xcode == REG)
+ print_reg (file, x);
+ else if (xcode == MEM)
+ output_address (GET_MODE (x), XEXP (x, 0));
+ else if (xcode == CONST_INT)
+ fprintf (file, "%i", (int)INTVAL (x));
+ else if (xcode == CONST_VECTOR)
+ print_operand (file, CONST_VECTOR_ELT (x, 0), code);
+ else if (xcode == CONST_DOUBLE)
+ {
+ const char *str;
+ switch (gcn_inline_fp_constant_p (x, false))
+ {
+ case 240:
+ str = "0.5";
+ break;
+ case 241:
+ str = "-0.5";
+ break;
+ case 242:
+ str = "1.0";
+ break;
+ case 243:
+ str = "-1.0";
+ break;
+ case 244:
+ str = "2.0";
+ break;
+ case 245:
+ str = "-2.0";
+ break;
+ case 246:
+ str = "4.0";
+ break;
+ case 247:
+ str = "-4.0";
+ break;
+ case 248:
+ str = "1/pi";
+ break;
+ default:
+ rtx ix = simplify_gen_subreg (GET_MODE (x) == DFmode
+ ? DImode : SImode,
+ x, GET_MODE (x), 0);
+ if (x)
+ print_operand (file, ix, code);
+ else
+ output_operand_lossage ("invlaid fp constant");
+ return;
+ break;
+ }
+ fprintf (file, str);
+ return;
+ }
+ else
+ output_addr_const (file, x);
+ return;
+ default:
+ output_operand_lossage ("invalid %%xn code");
+ }
+ gcc_unreachable ();
+}
+
+
+/* Addressing. */
+
+/* Return true if X is CONST_VECTOR of single constant. */
+
+static bool
+single_cst_vector_p (rtx x)
+{
+ if (GET_CODE (x) != CONST_VECTOR)
+ return false;
+ for (int i = 1; i < 64; i++)
+ if (CONST_VECTOR_ELT (x, i) != CONST_VECTOR_ELT (x, 0))
+ return false;
+ return true;
+}
+
+/* Recognizes RTL expressions that are valid memory addresses for an
+ instruction. The MODE argument is the machine mode for the MEM
+ expression that wants to use this address.
+
+ It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
+ convert common non-canonical forms to canonical form so that they will
+ be recognized. */
+
+static bool
+gcn_legitimate_address_p (machine_mode mode, rtx x, bool,
+ enum gcn_address_type type)
+{
+ enum machine_mode addr_mode;
+ switch (type)
+ {
+ case GCN_ADDR_LOAD:
+ case GCN_ADDR_STORE:
+ addr_mode = DImode;
+ break;
+ case GCN_ADDR_BUFFERED:
+ addr_mode = SImode;
+ break;
+ case GCN_ADDR_FLAT:
+ if (vgpr_vector_mode_p (mode))
+ addr_mode = V64DImode;
+ else
+ addr_mode = DImode;
+ break;
+ case GCN_ADDR_DS:
+ if (vgpr_vector_mode_p (mode))
+ addr_mode = V64SImode;
+ else
+ addr_mode = SImode;
+ break;
+ }
+ if (GET_MODE (x) != addr_mode)
+ return false;
+ if (type == GCN_ADDR_LOAD || type == GCN_ADDR_STORE)
+ switch (GET_CODE (x))
+ {
+ case REG:
+ return (REGNO (x) >= FIRST_PSEUDO_REGISTER
+ || gcn_sgpr_register_operand (x, DImode));
+ /* Addresses are in the form BASE+OFFSET
+ OFFSET is either 20bit unsigned immediate, SGPR or M0.
+ Writes and atomics do not accept SGPR. */
+ case PLUS:
+ {
+ rtx x0 = XEXP (x, 0);
+ rtx x1 = XEXP (x, 1);
+ if (GET_CODE (x0) != REG
+ || (REGNO (x0) <= FIRST_PSEUDO_REGISTER
+ && !gcn_sgpr_register_operand (x0, DImode)))
+ return false;
+ if (GET_CODE (x1) == REG)
+ {
+ if (GET_CODE (x1) != REG
+ || (REGNO (x1) == M0_REG
+ /* FIXME: maybe this is about parameter of a store. */
+ && (type == GCN_ADDR_STORE
+ || (REGNO (x1) <= FIRST_PSEUDO_REGISTER
+ && !gcn_sgpr_register_operand (x1, DImode)))))
+ return false;
+ }
+ else if (GET_CODE (x1) == CONST_INT)
+ {
+ if (INTVAL (x1) >= 0 && INTVAL (x1) < (1<<20))
+ return true;
+ }
+ return false;
+ }
+
+ default:
+ break;
+ }
+ else if (type == GCN_ADDR_BUFFERED)
+ {
+ return (GET_CODE (x) == REG
+ && (REGNO (x) >= FIRST_PSEUDO_REGISTER
+ || gcn_sgpr_register_operand (x, SImode)));
+ }
+ else if (type == GCN_ADDR_FLAT)
+ {
+ return (GET_CODE (x) == REG
+ && (REGNO (x) >= FIRST_PSEUDO_REGISTER
+ || gcn_vgpr_register_operand (x, DImode)));
+ }
+ else if (type == GCN_ADDR_DS)
+ switch (GET_CODE (x))
+ {
+ case REG:
+ return (REGNO (x) >= FIRST_PSEUDO_REGISTER
+ || gcn_vgpr_register_operand (x, DImode));
+ /* Addresses are in the form BASE+OFFSET
+ OFFSET is either 20bit unsigned immediate, SGPR or M0.
+ Writes and atomics do not accept SGPR. */
+ case PLUS:
+ {
+ rtx x0 = XEXP (x, 0);
+ rtx x1 = XEXP (x, 1);
+ if (GET_CODE (x0) != REG
+ || (REGNO (x0) <= FIRST_PSEUDO_REGISTER
+ && !gcn_vgpr_register_operand (x0, DImode)))
+ return false;
+ if (GET_CODE (x1) == REG)
+ {
+ if (GET_CODE (x1) != REG
+ || (REGNO (x1) == M0_REG
+ /* FIXME: maybe this is about parameter of a store. */
+ && (type == GCN_ADDR_STORE
+ || (REGNO (x1) <= FIRST_PSEUDO_REGISTER
+ && !gcn_sgpr_register_operand (x1, DImode)))))
+ return false;
+ }
+ else if (GET_CODE (x1) == CONST_VECTOR
+ && GET_CODE (CONST_VECTOR_ELT (x1, 0)) == CONST_INT
+ && single_cst_vector_p (x1))
+ {
+ x1 = CONST_VECTOR_ELT (x1, 0);
+ if (INTVAL (x1) >= 0 && INTVAL (x1) < (1<<20))
+ return true;
+ }
+ return false;
+ }
+
+ default:
+ break;
+ }
+ else
+ gcc_unreachable ();
+ return false;
+}
+
+/* Recognizes RTL expressions that are valid memory addresses for an
+ instruction. The MODE argument is the machine mode for the MEM
+ expression that wants to use this address.
+
+ It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
+ convert common non-canonical forms to canonical form so that they will
+ be recognized. */
+
+static bool
+gcn_addr_space_legitimate_address_p (machine_mode mode, rtx x, bool strict,
+ addr_space_t as)
+{
+ return gcn_legitimate_address_p (mode, x, strict, gcn_addr_space_type (as));
+}
+
+
+/* Return the appropriate mode for a named address pointer. */
+static machine_mode
+gcn_addr_space_pointer_mode (addr_space_t addrspace)
+{
+ switch (addrspace)
+ {
+ case ADDR_SPACE_SCRATCH:
+ case ADDR_SPACE_LDS:
+ case ADDR_SPACE_GDS:
+ return SImode;
+ case ADDR_SPACE_FLAT:
+ case ADDR_SPACE_SCALAR_FLAT:
+ return DImode;
+ default:
+ gcc_unreachable ();
+ }
+}
+
+/* Return the appropriate mode for a named address address. */
+static machine_mode
+gcn_addr_space_address_mode (addr_space_t addrspace)
+{
+ return gcn_addr_space_pointer_mode (addrspace);
+}
+
+/* Determine if one named address space is a subset of another. */
+
+static bool
+gcn_addr_space_subset_p (addr_space_t subset, addr_space_t superset)
+{
+ if (subset == superset)
+ return true;
+ if (superset == ADDR_SPACE_FLAT || superset == ADDR_SPACE_SCALAR_FLAT)
+ return true;
+ return false;
+}
+
+/* Convert from one address space to another. */
+static rtx
+gcn_addr_space_convert (rtx /*op*/, tree /*from_type*/, tree /*to_type*/)
+{
+ gcc_unreachable ();
+}
+
+/* Retun true if REGNO is OK for memory adressing. */
+
+bool
+gcn_regno_mode_code_ok_for_base_p (int regno,
+ machine_mode, addr_space_t, int, int)
+{
+ if (regno >= FIRST_PSEUDO_REGISTER)
+ {
+ if (reg_renumber[regno])
+ regno = reg_renumber[regno];
+ else
+ return true;
+ }
+ return SGPR_REGNO_P (regno);
+}
+
+/* Retun true if REGNO is OK for memory adressing. */
+
+reg_class
+gcn_mode_code_base_reg_class (machine_mode /*mode*/, addr_space_t as, int, int)
+{
+ switch (gcn_addr_space_type (as))
+ {
+ case GCN_ADDR_LOAD:
+ case GCN_ADDR_STORE:
+ case GCN_ADDR_BUFFERED:
+ return GENERAL_REGS;
+ break;
+ case GCN_ADDR_FLAT:
+ case GCN_ADDR_DS:
+ return VGPR_REGS;
+ }
+ gcc_unreachable ();
+}
+
+/* Return true if REGNO is OK for index of memory addressing. */
+
+bool
+regno_ok_for_index_p (int regno)
+{
+ if (regno >= FIRST_PSEUDO_REGISTER)
+ {
+ if (reg_renumber[regno])
+ regno = reg_renumber[regno];
+ else
+ return true;
+ }
+ return regno == M0_REG || VGPR_REGNO_P (regno);
+}
+
+/* "Function" calling conventions. */
+
+struct gcn_kernel_arg_type
+{
+ const char *name;
+ const char *header_pseudo;
+ int mode;
+ int fixed_regno;
+};
+
+#define PRIVATE_SEGMENT_BUFFER_ARG 1
+#define KERNARG_SEGMENT_PTR 4
+#define FLAT_SCRATCH_INIT_ARG 6
+#define FLAT_SCRATCH_SEGMENT_SIZE_ARG 7
+#define WORKGROUP_ID_X_ARG 11
+#define PRIVATE_SEGMENT_WAVE_OFFSET_ARG 15
+#define WORK_ITEM_ID_Y_ARG 17
+#define WORK_ITEM_ID_Z_ARG 18
+
+static const struct gcn_kernel_arg_type gcn_kernel_arg_types[] =
+{
+ {"exec", NULL, DImode, EXEC_REG},
+ {"private_segment_buffer", "enable_sgpr_private_segment_buffer", TImode, -1},
+ {"dispatch_ptr", "enable_sgpr_dispatch_ptr", DImode, -1},
+ {"queue_ptr", "enable_sgpr_queue_ptr", DImode, -1},
+ {"kernarg_segment_ptr", "enable_sgpr_kernarg_segment_ptr", DImode, -1},
+ {"dispatch_id", "enable_sgpr_dispatch_id", DImode, -1},
+ {"flat_scratch_init", "enable_sgpr_flat_scratch_init", DImode, -1},
+ {"private_segment_size", "enable_sgpr_private_segment_size", SImode, -1},
+ {"grid_workgroup_count_X", "enable_sgpr_grid_workgroup_count_x", SImode, -1},
+ {"grid_workgroup_count_Y", "enable_sgpr_grid_workgroup_count_y", SImode, -1},
+ {"grid_workgroup_count_Z", "enable_sgpr_grid_workgroup_count_z", SImode, -1},
+ {"workgroup_id_X", NULL /*"enable_sgpr_workgroup_id_x"*/, SImode, -1},
+ {"workgroup_id_Y", NULL /*"enable_sgpr_workgroup_id_y"*/, SImode, -1},
+ {"workgroup_id_Z", NULL /*"enable_sgpr_workgroup_id_z"*/, SImode, -1},
+ {"workgroup_info", NULL /*"enable_sgpr_workgroup_info"*/, SImode, -1},
+ {"private_segment_wave_offset",
+ NULL /*"enable_sgpr_private_segment_wave_offset"*/, SImode, -1},
+ {"work_item_id_X", NULL, V64SImode, FIRST_VGPR_REG},
+ {"work_item_id_Y", NULL, V64SImode, FIRST_VGPR_REG + 1},
+ {"work_item_id_Z", NULL, V64SImode, FIRST_VGPR_REG + 2}};
+
+static bool
+gcn_parse_amdgpu_hsa_kernel_attribute (struct gcn_kernel_args *args, tree list)
+{
+ bool err = false;
+ args->requested = (1 << PRIVATE_SEGMENT_BUFFER_ARG)
+ | (1 << FLAT_SCRATCH_INIT_ARG)
+ | (1 << KERNARG_SEGMENT_PTR)
+ | (1 << PRIVATE_SEGMENT_WAVE_OFFSET_ARG);
+ args->nargs = 0;
+
+ for (int a = 0; a < GCN_KERNEL_ARG_TYPES; a++)
+ args->reg[a] = -1;
+
+ for (; list; list = TREE_CHAIN (list))
+ {
+ const char *str;
+ if (TREE_CODE (TREE_VALUE (list)) != STRING_CST)
+ {
+ error ("amdgpu_hsa_kernel attribute requires string constant "
+ "arguments");
+ break;
+ }
+ str = TREE_STRING_POINTER (TREE_VALUE (list));
+ int a;
+ for (a = 0; a < GCN_KERNEL_ARG_TYPES; a++)
+ {
+ if (!strcmp (str, gcn_kernel_arg_types[a].name))
+ break;
+ }
+ if (a == GCN_KERNEL_ARG_TYPES)
+ {
+ error ("unknown specifier %s in amdgpu_hsa_kernel attribute",
+ str);
+ err = true;
+ break;
+ }
+ if (args->requested & (1<<a))
+ {
+ error ("duplicated parameter specifier %s in amdgpu_hsa_kernel "
+ "attribute",
+ str);
+ err = true;
+ break;
+ }
+ args->requested |= (1<<a);
+ args->order[args->nargs++] = a;
+ }
+ args->requested |= (1 << WORKGROUP_ID_X_ARG);
+
+ int sgpr_regno = FIRST_SGPR_REG;
+ for (int a = 0; a < GCN_KERNEL_ARG_TYPES; a++)
+ {
+ if (!(args->requested & (1<<a)))
+ continue;
+
+ if (gcn_kernel_arg_types[a].fixed_regno > 0)
+ args->reg[a] = gcn_kernel_arg_types[a].fixed_regno;
+ else if (gcn_kernel_arg_types[a].mode == SImode)
+ args->reg[a] = sgpr_regno++;
+ else if (gcn_kernel_arg_types[a].mode == DImode)
+ {
+ args->reg[a] = sgpr_regno;
+ sgpr_regno += 2;
+ }
+ else if (gcn_kernel_arg_types[a].mode == TImode)
+ {
+ args->reg[a] = sgpr_regno;
+ sgpr_regno += 4;
+ }
+ else
+ gcc_unreachable ();
+ }
+ args->nsgprs = sgpr_regno;
+ if (sgpr_regno > FIRST_SGPR_REG + 16)
+ {
+ error ("too many arguments passed in sgpr registers");
+ }
+ return err;
+}
+
+static tree
+gcn_handle_amdgpu_hsa_kernel_attribute (tree *node, tree name,
+ tree args, int, bool *no_add_attrs)
+{
+ if (TREE_CODE (*node) != FUNCTION_TYPE
+ && TREE_CODE (*node) != METHOD_TYPE
+ && TREE_CODE (*node) != FIELD_DECL
+ && TREE_CODE (*node) != TYPE_DECL)
+ {
+ warning (OPT_Wattributes, "%qE attribute only applies to functions",
+ name);
+ *no_add_attrs = true;
+ return NULL_TREE;
+ }
+
+ /* Can combine regparm with all attributes but fastcall, and thiscall. */
+ if (is_attribute_p ("gcnhsa_kernel", name))
+ {
+ struct gcn_kernel_args kernelarg;
+
+ if (gcn_parse_amdgpu_hsa_kernel_attribute (&kernelarg, args))
+ *no_add_attrs = true;
+
+ return NULL_TREE;
+ }
+
+ return NULL_TREE;
+}
+
+/* Define how to find the value returned by a function.
+ VALTYPE is the data type of the value (as a tree).
+ If the precise function being called is known, FUNC is its FUNCTION_DECL;
+ otherwise, FUNC is 0. */
+
+static rtx
+gcn_function_value (const_tree valtype, const_tree, bool)
+{
+ /* FIXME: There is no way to return value.
+ The value will be simply ignored. Perhaps generic code can be told
+ to handle NULL in this case. */
+ return gen_rtx_REG (TYPE_MODE (valtype), SGPR_REGNO (0));
+}
+
+/* Return true if N is a possible register number of function value. */
+
+static bool
+gcn_function_value_regno_p (const unsigned int)
+{
+ return false;
+}
+
+/* Find a location for the static chain incoming to a nested function.
+ This is a register, unless all free registers are used by arguments. */
+
+static rtx
+gcn_static_chain (const_tree, bool)
+{
+ /* FIXME: there is no function ABI, so we don't support nested
+ functions either. */
+ return NULL;
+}
+
+
+static rtx
+gcn_function_arg (cumulative_args_t cum_v, machine_mode mode, const_tree type,
+ bool)
+{
+ CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
+ if (cum->num >= cum->args.nargs)
+ {
+ cum->offset = (cum->offset + TYPE_ALIGN (type) / 8 - 1)
+ & -(TYPE_ALIGN (type) / 8);
+ cfun->machine->kernarg_segment_alignment
+ = MAX ((unsigned) cfun->machine->kernarg_segment_alignment,
+ TYPE_ALIGN (type) / 8);
+ rtx addr = gen_rtx_REG (DImode, cum->args.reg [KERNARG_SEGMENT_PTR]);
+ if (cum->offset)
+ addr = gen_rtx_PLUS (DImode, addr, gen_int_mode (cum->offset, mode));
+ rtx mem = gen_rtx_MEM (mode, addr);
+ set_mem_attributes (mem, const_cast<tree>(type), 1);
+ set_mem_addr_space (mem, ADDR_SPACE_SCALAR_FLAT);
+ MEM_READONLY_P (mem) = 1;
+ return mem;
+ }
+
+ int a = cum->args.order[cum->num];
+ if (mode != gcn_kernel_arg_types[a].mode)
+ {
+ error ("wrong type of argument %s", gcn_kernel_arg_types[a].name);
+ return 0;
+ }
+ return gen_rtx_REG ((machine_mode)gcn_kernel_arg_types[a].mode,
+ cum->args.reg[a]);
+}
+
+static void
+gcn_function_arg_advance (cumulative_args_t cum_v, machine_mode,
+ const_tree type, bool)
+{
+ CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
+ if (cum->num < cum->args.nargs)
+ cum->num++;
+ else
+ {
+ cum->offset += tree_to_uhwi (TYPE_SIZE_UNIT (type));
+ cfun->machine->kernarg_segment_byte_size = cum->offset;
+ }
+}
+
+
+/* Initialize a variable CUM of type CUMULATIVE_ARGS for a call to a function
+ whose data type is FNTYPE. For a library call, FNTYPE is 0. */
+
+void
+gcn_init_cumulative_args (CUMULATIVE_ARGS *cum /* Argument info to init */,
+ tree fntype /* tree ptr for function decl */,
+ rtx libname /* SYMBOL_REF of library name or 0 */,
+ tree fndecl,
+ int caller)
+{
+ memset (cum, 0, sizeof (*cum));
+ if (libname)
+ {
+ sorry ("no one decided on calling convention yet: can not expand "
+ "libcall %s0", XSTR (libname, 0));
+ return;
+ }
+ tree attr = NULL;
+ if (fndecl)
+ attr = lookup_attribute ("amdgpu_hsa_kernel", DECL_ATTRIBUTES (fndecl));
+ if (fndecl && !attr)
+ attr = lookup_attribute ("amdgpu_hsa_kernel",
+ TYPE_ATTRIBUTES (TREE_TYPE (fndecl)));
+ if (!attr && fntype)
+ attr = lookup_attribute ("amdgpu_hsa_kernel", TYPE_ATTRIBUTES (fntype));
+ /* Handle main () as kernel, so we can run testsuite. */
+ if (!attr && !caller && fndecl && MAIN_NAME_P (DECL_NAME (fndecl)))
+ gcn_parse_amdgpu_hsa_kernel_attribute (&cum->args, NULL_TREE);
+ else
+ {
+ if (!attr || caller)
+ {
+ sorry ("no one decided on calling convention yet; please just do "
+ "not call functions");
+ return;
+ }
+ gcn_parse_amdgpu_hsa_kernel_attribute
+ (&cum->args, TREE_VALUE (attr));
+ }
+ cfun->machine->args = cum->args;
+ /* We changed regno of frame pointer. */
+ init_emit_regs ();
+ init_regs ();
+}
+
+/* Expanders. */
+
+/* Return N-th part of value occupying multiple registers. */
+
+rtx
+gcn_operand_part (enum machine_mode mode, rtx op, int n)
+{
+ if (mode == V64DImode || mode == V64SImode)
+ {
+ if (REG_P (op))
+ {
+ gcc_assert (REGNO (op) + n < FIRST_PSEUDO_REGISTER);
+ return gen_rtx_REG (V64SImode, REGNO (op) + n);
+ }
+ if (GET_CODE (op) == CONST_VECTOR)
+ {
+ int units = GET_MODE_NUNITS (mode);
+ rtvec v = rtvec_alloc (units);
+
+ for (int i = 0; i < units; ++i)
+ RTVEC_ELT (v, i) = gcn_operand_part (GET_MODE_INNER (mode),
+ CONST_VECTOR_ELT (op, i), n);
+
+ return gen_rtx_CONST_VECTOR (mode, v);
+ }
+ if (GET_CODE (op) == UNSPEC
+ && XINT (op, 1) == UNSPEC_VECTOR)
+ return gcn_gen_undef (V64SImode);
+ gcc_unreachable ();
+ }
+ else
+ {
+ if (GET_CODE (op) == UNSPEC
+ && XINT (op, 1) == UNSPEC_VECTOR)
+ return gcn_gen_undef (SImode);
+ return simplify_gen_subreg (SImode, op, mode, n * 4);
+ }
+}
+
+/* Return N-th part of value occupying multiple registers. */
+
+rtx
+gcn_operand_doublepart (enum machine_mode mode, rtx op, int n)
+{
+ return simplify_gen_subreg (DImode, op, mode, n * 8);
+}
+
+/* Split all operands in OPERANDS into parts so first NOPERANDS corresponds
+ to the first part and so on. */
+
+void
+gcn_split_operands (enum machine_mode mode, rtx *operands,
+ int nparts, int noperands)
+{
+ for (int i = nparts - 1; i >= 0; i--)
+ for (int j = 0; j < noperands; j++)
+ operands [i*noperands + j] = gcn_operand_part (mode, operands[j], i);
+}
+
+/* Return true if OP can be splitted. */
+
+bool
+gcn_can_split_p (enum machine_mode, rtx op)
+{
+ if (vgpr_vector_mode_p (GET_MODE (op)))
+ {
+ if (GET_CODE (op) == SUBREG)
+ op = SUBREG_REG (op);
+ if (!REG_P (op))
+ return true;
+ return REGNO (op) <= FIRST_PSEUDO_REGISTER;
+ }
+ return true;
+}
+
+/* Return true if all of OPERANDS can be splitted by gcnsplit_operands. */
+
+bool
+gcn_can_split_operands_p (enum machine_mode mode, rtx *operands,
+ int noperands)
+{
+ for (int i = noperands - 1; i >= 0; i--)
+ if (!gcn_can_split_p (mode, operands[i]))
+ return false;
+ return true;
+}
+
+rtx
+gcn_gen_undef (enum machine_mode mode)
+{
+ return gen_rtx_UNSPEC (mode, gen_rtvec (1, const0_rtx), UNSPEC_VECTOR);
+}
+
+rtx
+gcn_vec_constant (enum machine_mode mode, int a)
+{
+ /*if (!a)
+ return CONST0_RTX (mode);
+ if (a == -1)
+ return CONSTM1_RTX (mode);
+ if (a == 1)
+ return CONST1_RTX (mode);
+ if (a == 2)
+ return CONST2_RTX (mode);*/
+
+ int units = GET_MODE_NUNITS (mode);
+ rtx tem = gen_int_mode (a, GET_MODE_INNER (mode));
+ rtvec v = rtvec_alloc (units);
+
+ for (int i = 0; i < units; ++i)
+ RTVEC_ELT (v, i) = tem;
+
+ return gen_rtx_CONST_VECTOR (mode, v);
+}
+
+rtx
+gcn_vec_constant (enum machine_mode mode, rtx a)
+{
+ int units = GET_MODE_NUNITS (mode);
+ rtvec v = rtvec_alloc (units);
+
+ for (int i = 0; i < units; ++i)
+ RTVEC_ELT (v, i) = a;
+
+ return gen_rtx_CONST_VECTOR (mode, v);
+}
+
+static rtx
+get_exec (int64_t val)
+{
+ rtx reg = gen_reg_rtx (DImode);
+ emit_insn (gen_rtx_SET (reg, gen_int_mode (val, DImode)));
+ return reg;
+}
+
+static rtx
+get_exec (rtx val)
+{
+ rtx reg = gen_reg_rtx (DImode);
+ emit_insn (gen_rtx_SET (reg, val));
+ return reg;
+}
+
+/* Generate move which uses the exec flags. */
+
+rtx
+gen_mov_with_exec (rtx op0, rtx op1, rtx exec = NULL)
+{
+ enum machine_mode mode = GET_MODE (op0);
+ if (vgpr_vector_mode_p (mode))
+ return (gen_rtx_SET (op0,
+ gen_rtx_VEC_MERGE (mode, op1, gcn_gen_undef (mode),
+ exec)));
+ return (gen_rtx_PARALLEL
+ (VOIDmode,
+ gen_rtvec (2, gen_rtx_SET (op0, op1),
+ gen_rtx_USE (VOIDmode, exec ? exec
+ : gcn_scalar_exec ()))));
+}
+
+/* Load vector constant where n-th lane contains BASE+n*VAL. */
+
+static rtx
+strided_constant (enum machine_mode mode, int base, int val)
+{
+ rtx x = gen_reg_rtx (mode);
+ emit_insn (gen_mov_with_exec (x, gcn_vec_constant (mode, base),
+ gcn_full_exec_reg ()));
+ emit_insn (gen_addv64si3_vector (x, x, gcn_vec_constant (mode, val * 32),
+ get_exec (0xffffffff00000000), x));
+ emit_insn (gen_addv64si3_vector (x, x, gcn_vec_constant (mode, val * 16),
+ get_exec (0xffff0000ffff0000), x));
+ emit_insn (gen_addv64si3_vector (x, x, gcn_vec_constant (mode, val * 8),
+ get_exec (0xff00ff00ff00ff00), x));
+ emit_insn (gen_addv64si3_vector (x, x, gcn_vec_constant (mode, val * 4),
+ get_exec (0xf0f0f0f0f0f0f0f0), x));
+ emit_insn (gen_addv64si3_vector (x, x, gcn_vec_constant (mode, val * 2),
+ get_exec (0xcccccccccccccccc), x));
+ emit_insn (gen_addv64si3_vector (x, x, gcn_vec_constant (mode, val * 1),
+ get_exec (0xaaaaaaaaaaaaaaaa), x));
+ return x;
+}
+
+/* Prepare address to load vector of MODE from address OP. */
+
+static rtx
+prepare_flat_address (enum machine_mode mode, rtx op, rtx exec)
+{
+ rtx addrs = gen_reg_rtx (V64DImode);
+ rtx base = force_reg (DImode, XEXP (op, 0));
+ rtx offsets = strided_constant (V64SImode, 0, GET_MODE_UNIT_SIZE (mode));
+
+ emit_insn (gen_vec_duplicatev64di
+ (addrs, base, exec, gcn_gen_undef (V64DImode)));
+
+ emit_insn (gen_addv64di3_zext (addrs, offsets, addrs, exec,
+ gcn_gen_undef (V64DImode)));
+ return change_address (op, GET_MODE (op), addrs);
+}
+
+/* Legitmize address X. */
+
+static rtx
+gcn_addr_space_legitimize_address (rtx x, rtx, machine_mode mode,
+ addr_space_t as)
+{
+ switch (gcn_addr_space_type (as))
+ {
+ case GCN_ADDR_LOAD:
+ case GCN_ADDR_STORE:
+ case GCN_ADDR_BUFFERED:
+ return x;
+ break;
+ case GCN_ADDR_FLAT:
+ if (vgpr_vector_mode_p (mode)
+ && GET_MODE (x) != V64DImode)
+ {
+ rtx exec = gcn_full_exec_reg ();
+ rtx addrs = gen_reg_rtx (V64DImode);
+ rtx base = force_reg (DImode, x);
+ rtx offsets = strided_constant (V64SImode, 0,
+ GET_MODE_UNIT_SIZE (mode));
+
+ emit_insn (gen_vec_duplicatev64di
+ (addrs, base, exec, gcn_gen_undef (V64DImode)));
+
+ emit_insn (gen_addv64di3_zext (addrs, offsets, addrs, exec,
+ gcn_gen_undef (V64DImode)));
+ return addrs;
+ }
+ case GCN_ADDR_DS:
+ /* FIXME: LDS support offsets, handle them!. */
+ if (vgpr_vector_mode_p (mode)
+ && GET_MODE (x) != V64SImode)
+ {
+ rtx exec = gcn_full_exec_reg ();
+ rtx addrs = gen_reg_rtx (V64SImode);
+ rtx base = force_reg (SImode, x);
+ rtx offsets = strided_constant (V64SImode, 0,
+ GET_MODE_UNIT_SIZE (mode));
+
+ emit_insn (gen_vec_duplicatev64si
+ (addrs, base, exec, gcn_gen_undef (V64SImode)));
+
+ emit_insn (gen_addv64si3_vector (addrs, offsets, addrs, exec,
+ gcn_gen_undef (V64SImode)));
+ return addrs;
+ }
+ return x;
+ }
+ gcc_unreachable ();
+}
+
+/* Move expander: move op1 to op0 in MODE.
+ Only handle special cases that can not be handled directly.
+ Return true if RTX was generated. */
+
+bool
+gcn_expand_mov (enum machine_mode mode, rtx op0, rtx op1)
+{
+ /* At least one of the operands needs to be a register. */
+ if (!register_operand (op0, mode) && !register_operand (op1, mode))
+ {
+ rtx temp = force_reg (mode, op1);
+ emit_move_insn (op0, temp);
+ return 1;
+ }
+ if (GET_MODE (op0) == QImode || GET_MODE (op0) == HImode)
+ {
+ /* We do not have sub-word register move patterns; just promote to
+ SImode. */
+ if (!memory_operand (op0, mode) && !memory_operand (op1, mode))
+ {
+ op0 = simplify_gen_subreg (SImode, op0, mode, 0);
+ op1 = simplify_gen_subreg (SImode, op1, mode, 0);
+ }
+ /* Scalar flat loads do not support sub-word accesses; emulate.
+ Hardware ignored lower 3 bits of the address, so we need to load
+ aligned address and adjust */
+ else if (GET_CODE (op1) == MEM
+ && MEM_ADDR_SPACE (op1) == ADDR_SPACE_SCALAR_FLAT)
+ {
+ rtx addr = force_reg (DImode, XEXP (op1, 0));
+ int align = get_mem_align_offset (op1, 32);
+ rtx tmp = gen_reg_rtx (SImode);
+
+ if (align >= 0)
+ {
+ rtx mem = widen_memory_access (op1, SImode, -align / 8);
+ emit_move_insn (tmp, mem);
+ if (align)
+ tmp
+ = expand_simple_binop (SImode, ASHIFTRT, tmp,
+ GEN_INT (align * 8), NULL_RTX, 1,
+ OPTAB_DIRECT);
+ emit_move_insn (simplify_gen_subreg (SImode, op0, mode, 0),
+ tmp);
+ }
+ else
+ {
+ rtx aligned = gen_reg_rtx (DImode);
+ emit_move_insn (addr, aligned);
+ emit_insn (gen_andsi3 (simplify_gen_subreg (SImode, aligned,
+ DImode, 0),
+ simplify_gen_subreg (SImode, aligned,
+ DImode, 0) ,
+ gen_int_mode (-3, DImode)));
+ rtx mem = change_address (op1, SImode, aligned);
+ set_mem_align (mem, 4) ;
+ emit_move_insn (tmp, mem);
+ rtx offset
+ = expand_simple_binop (SImode, AND,
+ simplify_gen_subreg (SImode, addr,
+ DImode, 0),
+ gen_int_mode (3, DImode),
+ NULL_RTX, 1, OPTAB_DIRECT);
+ offset
+ = expand_simple_binop (SImode, ASHIFT, offset,
+ gen_int_mode (3, DImode), NULL_RTX, 1,
+ OPTAB_DIRECT);
+ rtx tmp2
+ = expand_simple_binop (SImode, ASHIFTRT, tmp, offset, NULL_RTX,
+ 1, OPTAB_DIRECT);
+ emit_move_insn (simplify_gen_subreg (SImode, op0, mode, 0), tmp2);
+ }
+ return 1;
+ }
+ }
+
+ if (GET_CODE (op1) == CONST_VECTOR && !gcn_constant_p (op1))
+ {
+ gcn_expand_vector_init (op0, op1);
+ return 1;
+ }
+ bool reg_exec = gcn_vgpr_move_p (op0, op1);
+ /* Scalar flat load. */
+ if (GET_CODE (op1) == MEM && MEM_ADDR_SPACE (op1) == ADDR_SPACE_SCALAR_FLAT)
+ return 0;
+ /* Scalar flat store. */
+ if (GET_CODE (op0) == MEM && MEM_ADDR_SPACE (op0) == ADDR_SPACE_SCALAR_FLAT)
+ {
+ sorry ("scalar flat stores are not supported yet");
+ return 0;
+ }
+
+ rtx exec
+ = vgpr_vector_mode_p (mode) ? gcn_full_exec () : gcn_scalar_exec ();
+ if (reg_exec && !REG_P (exec))
+ exec = get_exec (exec);
+
+ /* Buffer load/stores for scratch memory segment. */
+ if ((GET_CODE (op0) == MEM
+ && MEM_ADDR_SPACE (op0) == ADDR_SPACE_SCRATCH)
+ || (GET_CODE (op1) == MEM
+ && MEM_ADDR_SPACE (op1) == ADDR_SPACE_SCRATCH))
+ {
+ emit_insn (gen_rtx_PARALLEL (VOIDmode,
+ gen_rtvec (3, gen_rtx_SET (op0, op1),
+ gen_rtx_USE (VOIDmode,
+ gen_rtx_REG
+ (TImode, 0)),
+ gen_rtx_USE (VOIDmode, exec))));
+ return 1;
+ }
+ /* Stores to hard registers can be optimized because we know if there
+ will be a need for exec or not. */
+ if (gcn_sgpr_register_operand (op0, mode)
+ && (gcn_sgpr_register_operand (op1, mode)
+ || !register_operand (op1, mode)))
+ return 0;
+
+ /* LRA needs to have memory among the altenratives. Arrange this by always
+ expanidng buffer load/store pattern that also allow reg-reg moves. */
+ if (lra_in_progress && !vgpr_vector_mode_p (mode) && !reg_exec)
+ {
+ emit_insn (gen_rtx_PARALLEL (VOIDmode,
+ gen_rtvec (4, gen_rtx_SET (op0, op1),
+ gen_rtx_USE (VOIDmode,
+ gen_rtx_REG
+ (TImode, 0)),
+ gen_rtx_USE (VOIDmode, exec),
+ gen_rtx_CLOBBER (VOIDmode,
+ gen_reg_rtx
+ (DImode)))));
+ return 1;
+ }
+ emit_insn (gen_mov_with_exec (op0, op1, exec));
+ return 1;
+}
+
+/* Generate masked move. */
+
+rtx
+gen_masked_mov (rtx op0, rtx op1, rtx exec)
+{
+ return (gen_rtx_SET (op0,
+ gen_rtx_VEC_MERGE (GET_MODE (op0),
+ op1, op0, exec)));
+}
+
+/* Generate masked move. */
+
+rtx
+gen_masked_scalar_load (rtx op0, rtx op1, rtx op2, rtx exec)
+{
+ return (gen_rtx_SET (op0,
+ gen_rtx_VEC_MERGE (GET_MODE (op0),
+ gen_rtx_VEC_DUPLICATE (GET_MODE (op0),
+ op1),
+ op2, exec)));
+}
+
+/* Expand vector init of OP0 by VEC. */
+
+void
+gcn_expand_vector_init (rtx op0, rtx vec)
+{
+ int64_t initialized_mask = 0;
+ int64_t curr_mask = 1;
+ machine_mode mode = GET_MODE (op0);
+
+ rtx val = XVECEXP (vec, 0, 0);
+
+ for (int i = 1; i < 64; i++)
+ if (rtx_equal_p (val, XVECEXP (vec, 0, i)))
+ curr_mask |= (int64_t)1 << i;
+
+ if (gcn_constant_p (val))
+ emit_insn (gen_mov_with_exec (op0, gcn_vec_constant (mode, val),
+ gcn_full_exec_reg ()));
+ else
+ {
+ val = force_reg (GET_MODE_INNER (mode), val);
+ emit_insn (gen_masked_scalar_load (op0, val, gcn_gen_undef (mode),
+ gcn_full_exec_reg ()));
+ }
+ initialized_mask |= curr_mask;
+ for (int i = 1; i < 64; i++)
+ if (!(initialized_mask & ((int64_t)1 << i)))
+ {
+ curr_mask = (int64_t)1 << i;
+ rtx val = XVECEXP (vec, 0, i);
+
+ for (int j = i + 1; j < 64; j++)
+ if (rtx_equal_p (val, XVECEXP (vec, 0, j)))
+ curr_mask |= (int64_t)1 << j;
+ if (gcn_constant_p (val))
+ emit_insn (gen_masked_mov (op0, gcn_vec_constant (mode, val),
+ get_exec (curr_mask)));
+ else
+ {
+ val = force_reg (GET_MODE_INNER (mode), val);
+ emit_insn (gen_masked_scalar_load (op0, val, op0,
+ get_exec (curr_mask)));
+ }
+ initialized_mask |= curr_mask;
+ }
+}
+
+/* Return true if move from OP0 to OP1 is known to be executed in vector
+ unit. */
+
+bool
+gcn_vgpr_move_p (rtx op0, rtx op1)
+{
+ if (MEM_P (op0) && MEM_ADDR_SPACE (op0) != ADDR_SPACE_SCALAR_FLAT)
+ return true;
+ if (MEM_P (op1) && MEM_ADDR_SPACE (op1) != ADDR_SPACE_SCALAR_FLAT)
+ return true;
+ return ((REG_P (op0) && VGPR_REGNO_P (REGNO (op0)))
+ || (REG_P (op1) && VGPR_REGNO_P (REGNO (op1)))
+ || vgpr_vector_mode_p (GET_MODE (op0)));
+}
+
+bool
+gcn_sgpr_move_p (rtx op0, rtx op1)
+{
+ if (MEM_P (op0) && MEM_ADDR_SPACE (op0) == ADDR_SPACE_SCALAR_FLAT)
+ return true;
+ if (MEM_P (op1) && MEM_ADDR_SPACE (op1) == ADDR_SPACE_SCALAR_FLAT)
+ return true;
+ if (!REG_P (op0) || REGNO (op0) > FIRST_PSEUDO_REGISTER
+ || VGPR_REGNO_P (REGNO (op0)))
+ return false;
+ if (REG_P (op1)
+ && REGNO (op1) < FIRST_PSEUDO_REGISTER
+ && !VGPR_REGNO_P (REGNO (op1)))
+ return true;
+ return immediate_operand (op1, VOIDmode) || memory_operand (op1, VOIDmode);
+}
+
+static void
+output_file_start (void)
+{
+ fprintf (asm_out_file, "\t.hsatext\n");
+ fprintf (asm_out_file, "\t.hsa_code_object_version 2,0\n");
+ fprintf (asm_out_file, "\t.hsa_code_object_isa\n"); /* Autodetect. */
+ fprintf (asm_out_file, "\t.section\t.AMDGPU.config\n");
+ fprintf (asm_out_file, "\t.hsatext\n");
+}
+
+/* This function produces the initial definition of a function name. */
+
+void
+gcn_hsa_declare_function_name (FILE *file, const char *name, tree)
+{
+ int sgpr, vgpr;
+ bool xnack_enabled = false;
+ int extra_regs = 0;
+
+ /* Determine count of sgpr/vgpr registers by looking for last
+ one used. */
+ for (sgpr = 101; sgpr >= 0; sgpr--)
+ if (df_regs_ever_live_p (FIRST_SGPR_REG + sgpr))
+ break;
+ sgpr++;
+ for (vgpr = 255; vgpr >= 0; vgpr--)
+ if (df_regs_ever_live_p (FIRST_VGPR_REG + vgpr))
+ break;
+ vgpr++;
+
+ if (xnack_enabled)
+ extra_regs = 6;
+ if (df_regs_ever_live_p (FLAT_SCRATCH_LO_REG)
+ || df_regs_ever_live_p (FLAT_SCRATCH_HI_REG))
+ extra_regs = 4;
+ else if (df_regs_ever_live_p (VCC_LO_REG) || df_regs_ever_live_p (VCC_HI_REG))
+ extra_regs = 2;
+
+ fprintf (file, "\t.type\t%s,@function\n", name);
+ fprintf (file, "\t.amdgpu_hsa_kernel\t%s\n", name);
+ fprintf (file, "%s:\n", name);
+ fprintf (file, "\t.amd_kernel_code_t\n"
+ "\t\tkernel_code_version_major = 1\n"
+ "\t\tkernel_code_version_minor = 0\n"
+ "\t\tmachine_kind = 1\n"
+ "\t\tmachine_version_major = 8\n"
+ "\t\tmachine_version_minor = 0\n"
+ "\t\tmachine_version_stepping = 1\n"
+ "\t\tkernel_code_entry_byte_offset = 256\n"
+ "\t\tkernel_code_prefetch_byte_size = 0\n"
+ "\t\tmax_scratch_backing_memory_byte_size = 0\n"
+ "\t\tcompute_pgm_rsrc1_vgprs = %i\n"
+ "\t\tcompute_pgm_rsrc1_sgprs = %i\n"
+ "\t\tcompute_pgm_rsrc1_priority = 0\n"
+ "\t\tcompute_pgm_rsrc1_float_mode = 192\n"
+ "\t\tcompute_pgm_rsrc1_priv = 0\n"
+ "\t\tcompute_pgm_rsrc1_dx10_clamp = 1\n"
+ "\t\tcompute_pgm_rsrc1_debug_mode = 0\n"
+ "\t\tcompute_pgm_rsrc1_ieee_mode = 1\n"
+ /* We enable scratch memory. */
+ "\t\tcompute_pgm_rsrc2_scratch_en = 1\n"
+ "\t\tcompute_pgm_rsrc2_user_sgpr = %i\n"
+ "\t\tcompute_pgm_rsrc2_tgid_x_en = 1\n"
+ "\t\tcompute_pgm_rsrc2_tgid_y_en = 0\n"
+ "\t\tcompute_pgm_rsrc2_tgid_z_en = 0\n"
+ "\t\tcompute_pgm_rsrc2_tg_size_en = 0\n"
+ "\t\tcompute_pgm_rsrc2_tidig_comp_cnt = 0\n"
+ "\t\tcompute_pgm_rsrc2_excp_en_msb = 0\n"
+ "\t\tcompute_pgm_rsrc2_lds_size = 8\n" /*FIXME */
+ "\t\tcompute_pgm_rsrc2_excp_en = 0\n",
+ (vgpr - 1) / 4,
+ /* Must match wavefront_sgpr_count */
+ (sgpr + extra_regs - 1) / 8,
+ /* The total number of SGPR user data registers requested. This
+ number must match the number of user data registers enabled. */
+ cfun->machine->args.nsgprs - 2);
+ for (int a = 0; a < GCN_KERNEL_ARG_TYPES; a++)
+ if (gcn_kernel_arg_types[a].header_pseudo)
+ fprintf (file, "\t\t%s = %i\n",
+ gcn_kernel_arg_types[a].header_pseudo,
+ (cfun->machine->args.requested & (1<<a)) != 0);
+ /*fprintf (file, "\t\tenable_vgpr_workitem_id = %i\n",
+ (cfun->machine->args.requested & (1 << WORK_ITEM_ID_Z_ARG))
+ ? 2
+ : cfun->machine->args.requested & (1 << WORK_ITEM_ID_Y_ARG)
+ ? 1 : 0);*/
+ fprintf (file, "\t\tenable_ordered_append_gds = 0\n"
+ "\t\tprivate_element_size = 1\n"
+ "\t\tis_ptr64 = 1\n"
+ "\t\tis_dynamic_callstack = 0\n"
+ "\t\tis_debug_enabled = 0\n"
+ "\t\tis_xnack_enabled = %i\n"
+ "\t\tworkitem_private_segment_byte_size = %i\n"
+ "\t\tworkgroup_group_segment_byte_size = 0\n"
+ "\t\tgds_segment_byte_size = 0\n"
+ "\t\tkernarg_segment_byte_size = %i\n"
+ "\t\tworkgroup_fbarrier_count = 0\n"
+ "\t\twavefront_sgpr_count = %i\n"
+ "\t\tworkitem_vgpr_count = %i\n"
+ "\t\treserved_vgpr_first = 0\n"
+ "\t\treserved_vgpr_count = 0\n"
+ "\t\treserved_sgpr_first = 0\n"
+ "\t\treserved_sgpr_count = 0\n"
+ "\t\tdebug_wavefront_private_segment_offset_sgpr = 0\n"
+ "\t\tdebug_private_segment_buffer_sgpr = 0\n"
+ "\t\tkernarg_segment_alignment = %i\n"
+ "\t\tgroup_segment_alignment = 4\n"
+ "\t\tprivate_segment_alignment = %i\n"
+ "\t\twavefront_size = 6\n"
+ "\t\tcall_convention = 0\n"
+ "\t\truntime_loader_kernel_symbol = 0\n"
+ "\t.end_amd_kernel_code_t\n",
+ xnack_enabled,
+ (int)get_frame_size (),
+ cfun->machine->kernarg_segment_byte_size,
+ /* Number of scalar registers used by a wavefront. This
+ includes the special SGPRs for VCC, Flat Scratch (Base,
+ Size) and XNACK (for GFX8 (VI)+). It does not include the
+ 16 SGPR added if a trap handler is enabled. Must match
+ compute_pgm_rsrc1.sgprs. */
+ sgpr + extra_regs, vgpr,
+ cfun->machine->kernarg_segment_alignment,
+ crtl->stack_alignment_needed / 8);
+}
+
+/* Generate prologue. */
+
+void
+gcn_expand_prologue ()
+{
+ /* Flat access to LDS requires apperture setup and M0 register setup. */
+ emit_insn (gen_rtx_SET (gen_rtx_REG (SImode, M0_REG),
+ gen_int_mode (65536, SImode)));
+ /* Flat access also need flat_scratch register initialized. */
+ if (cfun->machine->args.reg[FLAT_SCRATCH_INIT_ARG] > 0)
+ {
+ rtx arg = gen_rtx_REG (DImode,
+ cfun->machine->args.reg[FLAT_SCRATCH_INIT_ARG]);
+ emit_insn (gen_lshrsi3_scalar (gcn_operand_part
+ (DImode, gen_rtx_REG (DImode,
+ FLAT_SCRATCH_REG),
+ 1),
+ gcn_operand_part (DImode, arg, 0),
+ gen_int_mode (8, SImode)));
+ emit_insn (gen_rtx_SET (gcn_operand_part (DImode,
+ gen_rtx_REG (DImode,
+ FLAT_SCRATCH_REG),
+ 0),
+ gcn_operand_part (DImode, arg, 1)));
+ }
+ if (df_regs_ever_live_p (HARD_FRAME_POINTER_REGNUM))
+ emit_insn (gen_rtx_SET (gen_rtx_REG (SImode, HARD_FRAME_POINTER_REGNUM),
+ gen_rtx_REG (SImode,
+ cfun->machine->args.reg
+ [PRIVATE_SEGMENT_WAVE_OFFSET_ARG])));
+ emit_insn (gen_prologue_use (gen_rtx_REG (SImode, M0_REG)));
+}
+
+/* Compute a (partial) cost for rtx X. Return true if the complete
+ cost has been computed, and false if subexpressions should be
+ scanned. In either case, *TOTAL contains the cost result. */
+
+static bool
+gcn_rtx_costs (rtx x, machine_mode, int, int, int *total, bool)
+{
+ enum rtx_code code = GET_CODE (x);
+ switch (code)
+ {
+ case CONST:
+ case CONST_DOUBLE:
+ case CONST_VECTOR:
+ case CONST_INT:
+ if (gcn_inline_constant_p (x))
+ *total = 0;
+ if (code == CONST_INT
+ && ((unsigned HOST_WIDE_INT) INTVAL (x) + 0x8000) < 0x10000)
+ *total = 1;
+ else if (gcn_constant_p (x))
+ *total = 2;
+ else
+ *total = vgpr_vector_mode_p (GET_MODE (x)) ? 64 : 4;
+ return true;
+
+ default:
+ *total = 3;
+ return false;
+ }
+}
+
+/* Return the cost of moving data of mode M between a
+ register and memory. A value of 2 is the default; this cost is
+ relative to those in `REGISTER_MOVE_COST'.
+
+ This function is used extensively by register_move_cost that is used to
+ build tables at startup. Make it inline in this case.
+ When IN is 2, return maximum of in and out move cost.
+
+ If moving between registers and memory is more expensive than
+ between two registers, you should define this macro to express the
+ relative cost.
+
+ Model also increased moving costs of QImode registers in non
+ Q_REGS classes.
+ */
+
+#define LOAD_COST 32
+#define STORE_COST 32
+static int
+gcn_memory_move_cost (machine_mode mode, reg_class_t regclass, bool in)
+{
+ int nregs = CEIL (GET_MODE_SIZE (mode), 4);
+ switch (regclass)
+ {
+ case SCC_CONDITIONAL_REG:
+ case VCCZ_CONDITIONAL_REG:
+ case VCC_CONDITIONAL_REG:
+ case EXECZ_CONDITIONAL_REG:
+ case ALL_CONDITIONAL_REGS:
+ case SGPR_REGS:
+ case SGPR_EXEC_REGS:
+ case EXEC_MASK_REG:
+ case SGPR_VOP3A_SRC_REGS:
+ case SGPR_MEM_SRC_REGS:
+ case SGPR_SRC_REGS:
+ case SGPR_DST_REGS:
+ case GENERAL_REGS:
+ if (!in)
+ return (STORE_COST+2) * nregs;
+ return LOAD_COST * nregs;
+ case VGPR_REGS:
+ if (in)
+ return (LOAD_COST+2) * nregs;
+ return STORE_COST * nregs;
+ case ALL_REGS:
+ case SRCDST_REGS:
+ if (in)
+ return (LOAD_COST+2) * nregs;
+ return (STORE_COST + 2) * nregs;
+ default:
+ gcc_unreachable ();
+ }
+}
+
+/* Return the cost of moving data from a register in class CLASS1 to
+ one in class CLASS2. Base value is 2. */
+static int
+gcn_register_move_cost (machine_mode, reg_class_t dst,
+ reg_class_t src)
+{
+ /* Increase cost of moving from and to vector registers. While this is
+ fast in hardware (I think), it has hidden cost of setting up the exec
+ flags. */
+ if ((src < VGPR_REGS) != (dst < VGPR_REGS))
+ return 4;
+ return 2;
+}
+
+/* Return class of registers which could be used for pseudo of MODE
+ and of class RCLASS for spilling instead of memory. Return NO_REGS
+ if it is not possible or non-profitable. */
+
+static reg_class_t
+gcn_spill_class (reg_class_t /*c*/, machine_mode /*mode*/)
+{
+ return SGPR_REGS;
+/*
+ return NO_REGS;
+ if (mode == DImode)
+ return SGPR_REGS;
+ return c;*/
+ /*return ALL_REGS;*/
+}
+
+/* Change allocno class for given pseudo from allocno and best class calculated
+ by IRA.
+
+ Be sure we do not try to spill into memory, since this is not supported
+ (yet). */
+
+static reg_class_t
+gcn_ira_change_pseudo_allocno_class (int, reg_class_t cl, reg_class_t)
+{
+ /*if (cl == NO_REGS)
+ return VGPR_REGS;*/
+ return cl;
+}
+
+/* Target hook for scalar_mode_supported_p. */
+
+static bool
+gcn_scalar_mode_supported_p (machine_mode mode)
+{
+ return default_scalar_mode_supported_p (mode);
+}
+
+/* Implements target hook vector_mode_supported_p. */
+
+static bool
+gcn_vector_mode_supported_p (machine_mode mode)
+{
+ return mode == V64SImode || mode == V64DImode || mode == V64SFmode;
+}
+
+/* Initialize machine_function. */
+
+static struct machine_function *
+gcn_init_machine_status (void)
+{
+ struct machine_function *f;
+
+ f = ggc_cleared_alloc<machine_function> ();
+ f->exec_reg = NULL_RTX;
+ return f;
+}
+
+/* Implement the TARGET_OPTION_OVERRIDE hook. */
+
+static void
+gcn_option_override (void)
+{
+ init_machine_status = gcn_init_machine_status;
+}
+
+/* Return pseudo holding default exec register. */
+
+rtx
+gcn_default_exec ()
+{
+ if (cfun->machine->exec_reg)
+ {
+ /* During RTX init we are called with no function CFG built. */
+ if (!cfun->machine->exec_reg_init
+ && cfun->cfg && 0)
+ {
+ cfun->machine->exec_reg_init = true;
+ }
+ return cfun->machine->exec_reg;
+ }
+ else
+ {
+ cfun->machine->exec_reg = gen_reg_rtx (DImode);
+ if (cfun->cfg && 0)
+ {
+ emit_insn_before (gen_rtx_SET (cfun->machine->exec_reg,
+ gen_rtx_REG (DImode, EXEC_REG)),
+ NEXT_INSN (entry_of_function ()));
+ cfun->machine->exec_reg_init = true;
+ }
+ }
+ return cfun->machine->exec_reg;
+}
+
+/* Return value of scalar exec register. */
+
+rtx
+gcn_scalar_exec ()
+{
+ return const1_rtx;
+}
+
+/* Return value of full exec register. */
+
+rtx
+gcn_full_exec ()
+{
+ return constm1_rtx;
+}
+
+/* Return pseudo holding full exec register. */
+
+rtx
+gcn_full_exec_reg ()
+{
+ return get_exec (-1);
+}
+
+/* Set live registers passed to the kernel. */
+
+static void
+gcn_live_on_entry (bitmap regs)
+{
+ /* This register holds the default exec mask. */
+ bitmap_set_bit (regs, EXEC_LO_REG);
+ bitmap_set_bit (regs, EXEC_HI_REG);
+ /* This register holds vector containing value N at N-th lane
+ for every lane where exec mask is set. */
+ bitmap_set_bit (regs, FIRST_VGPR_REG);
+}
+
+/* Table of valid machine attributes. */
+static const struct attribute_spec gcn_attribute_table[] =
+{
+ /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
+ affects_type_identity } */
+ { "amdgpu_hsa_kernel", 0, GCN_KERNEL_ARG_TYPES, false, true, true,
+ gcn_handle_amdgpu_hsa_kernel_attribute, true },
+ /* End element. */
+ { NULL, 0, 0, false, false, false, NULL, false }
+};
+
+/* When this hook returns true for MODE, the compiler allows
+ registers explicitly used in the rtl to be used as spill registers
+ but prevents the compiler from extending the lifetime of these
+ registers. */
+
+bool
+gcn_small_register_classes_for_mode_p (machine_mode mode)
+{
+ /* We allocate into exec and vcc regs. Those make small register class. */
+ return mode == DImode || mode == SImode;
+}
+
+/* Vector registers are wide and we can not directly subreg into word
+ sized parts. */
+
+int
+gcn_regmode_natural_size (enum machine_mode mode)
+{
+ if (vgpr_vector_mode_p (mode))
+ return GET_MODE_SIZE (mode);
+ return 4;
+}
+
+enum gcn_builtin_type_index
+{
+ GCN_BTI_END_OF_PARAMS,
+
+ GCN_BTI_VOID,
+ GCN_BTI_EXEC,
+
+ GCN_BTI_V64SI,
+ GCN_BTI_V64SF,
+ GCN_BTI_V64PTR,
+ GCN_BTI_SIPTR,
+ GCN_BTI_SFPTR,
+
+ GCN_BTI_MAX
+};
+
+static GTY(()) tree gcn_builtin_types[GCN_BTI_MAX];
+
+#define exec_type_node (gcn_builtin_types[GCN_BTI_EXEC])
+#define v64si_type_node (gcn_builtin_types[GCN_BTI_V64SI])
+#define v64sf_type_node (gcn_builtin_types[GCN_BTI_V64SF])
+#define v64ptr_type_node (gcn_builtin_types[GCN_BTI_V64PTR])
+#define siptr_type_node (gcn_builtin_types[GCN_BTI_SIPTR])
+#define sfptr_type_node (gcn_builtin_types[GCN_BTI_SFPTR])
+
+static rtx gcn_expand_builtin_1 (tree, rtx, rtx, machine_mode, int,
+ struct gcn_builtin_description *);
+static rtx gcn_expand_builtin_binop (tree, rtx, rtx, machine_mode, int,
+ struct gcn_builtin_description *);
+
+struct gcn_builtin_description;
+typedef rtx (*gcn_builtin_expander) (tree, rtx, rtx, machine_mode, int,
+ struct gcn_builtin_description *);
+
+enum gcn_builtin_type
+{
+ B_UNIMPLEMENTED, /* Sorry out */
+ B_INSN, /* Emit a pattern */
+ B_OVERLOAD, /* Placeholder for an overloaded function */
+};
+
+struct gcn_builtin_description
+{
+ int fcode;
+ int icode;
+ const char *name;
+ enum gcn_builtin_type type;
+ /* The first element of parm is always the return type. The rest
+ are a zero terminated list of parameters. */
+ int parm[6];
+ gcn_builtin_expander expander;
+};
+
+
+/* Codes for all the GCN builtins. */
+
+enum gcn_builtin_codes
+{
+#define DEF_BUILTIN(fcode, icode, name, type, params, expander) \
+ GCN_BUILTIN_ ## fcode,
+#define DEF_BUILTIN_BINOP_INT_FP(fcode, ic, name) \
+ GCN_BUILTIN_ ## fcode ## _V64SI, \
+ GCN_BUILTIN_ ## fcode ## _V64SI_unspec,
+#include "gcn-builtins.def"
+#undef DEF_BUILTIN
+#undef DEF_BUILTIN_BINOP_INT_FP
+ GCN_BUILTIN_MAX
+};
+
+extern GTY(()) struct gcn_builtin_description gcn_builtins[GCN_BUILTIN_MAX];
+
+struct gcn_builtin_description gcn_builtins[] = {
+#define DEF_BUILTIN(fcode, icode, name, type, params, expander) \
+ {GCN_BUILTIN_ ## fcode, icode, name, type, params, expander},
+
+#define DEF_BUILTIN_BINOP_INT_FP(fcode, ic, name) \
+ {GCN_BUILTIN_ ## fcode ## _V64SI, \
+ CODE_FOR_ ## ic ##v64si3_vector, name "_v64int", B_INSN, \
+ {GCN_BTI_V64SI, GCN_BTI_EXEC, GCN_BTI_V64SI, GCN_BTI_V64SI, \
+ GCN_BTI_V64SI, GCN_BTI_END_OF_PARAMS}, gcn_expand_builtin_binop}, \
+ {GCN_BUILTIN_ ## fcode ## _V64SI_unspec, \
+ CODE_FOR_ ## ic ##v64si3_vector, name "_v64int_unspec", B_INSN, \
+ {GCN_BTI_V64SI, GCN_BTI_EXEC, GCN_BTI_V64SI, GCN_BTI_V64SI, \
+ GCN_BTI_END_OF_PARAMS}, gcn_expand_builtin_binop},
+
+#include "gcn-builtins.def"
+#undef DEF_BUILTIN_BINOP_INT_FP
+#undef DEF_BUILTIN
+};
+
+static GTY(()) tree gcn_builtin_decls[GCN_BUILTIN_MAX];
+
+/* Return the GCN builtin for CODE. */
+
+static tree
+gcn_builtin_decl (unsigned code, bool ARG_UNUSED (initialize_p))
+{
+ if (code >= GCN_BUILTIN_MAX)
+ return error_mark_node;
+
+ return gcn_builtin_decls[code];
+}
+
+static void
+gcn_init_builtin_types (void)
+{
+ gcn_builtin_types[GCN_BTI_VOID] = void_type_node;
+ exec_type_node = unsigned_intDI_type_node;
+ v64si_type_node = build_vector_type (intSI_type_node, 64);
+ v64sf_type_node = build_vector_type (float_type_node, 64);
+ v64ptr_type_node
+ = build_vector_type (unsigned_intDI_type_node
+ /*build_pointer_type (integer_type_node)*/, 64);
+ tree tmp = build_distinct_type_copy (intSI_type_node);
+ TYPE_ADDR_SPACE (tmp) = ADDR_SPACE_FLAT;
+ siptr_type_node = build_pointer_type (tmp);
+
+ tmp = build_distinct_type_copy (float_type_node);
+ TYPE_ADDR_SPACE (tmp) = ADDR_SPACE_FLAT;
+ sfptr_type_node = build_pointer_type (tmp);
+}
+
+/* Set up all builtin functions for this target. */
+
+static void
+gcn_init_builtins (void)
+{
+ gcn_init_builtin_types ();
+
+ struct gcn_builtin_description *d;
+ unsigned int i;
+ for (i = 0, d = gcn_builtins; i < GCN_BUILTIN_MAX; i++, d++)
+ {
+ tree p;
+ char name[64]; /* build_function will make a copy. */
+ int parm;
+
+ /* FIXME: Is this necessary/useful? */
+ if (d->name == 0)
+ continue;
+
+ /* Find last parm. */
+ for (parm = 1; d->parm[parm] != GCN_BTI_END_OF_PARAMS; parm++)
+ ;
+
+ p = void_list_node;
+ while (parm > 1)
+ p = tree_cons (NULL_TREE, gcn_builtin_types[d->parm[--parm]], p);
+
+ p = build_function_type (gcn_builtin_types[d->parm[0]], p);
+
+ sprintf (name, "__builtin_gcn_%s", d->name);
+ gcn_builtin_decls[i]
+ = add_builtin_function (name, p, i, BUILT_IN_MD, NULL, NULL_TREE);
+
+ /* These builtins don't throw. */
+ TREE_NOTHROW (gcn_builtin_decls[i]) = 1;
+ }
+}
+
+static rtx
+gcn_expand_builtin_1 (tree exp, rtx target, rtx /*subtarget*/,
+ machine_mode /*mode*/, int ignore,
+ struct gcn_builtin_description *)
+{
+ tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
+ switch (DECL_FUNCTION_CODE (fndecl))
+ {
+ case GCN_BUILTIN_FLAT_LOAD_INT32:
+ {
+ if (ignore)
+ return target;
+ /*rtx exec = */
+ force_reg (DImode,
+ expand_expr (CALL_EXPR_ARG (exp, 0), NULL_RTX, DImode,
+ EXPAND_NORMAL));
+ /*rtx ptr =*/
+ force_reg (V64DImode,
+ expand_expr (CALL_EXPR_ARG (exp, 1), NULL_RTX, V64DImode,
+ EXPAND_NORMAL));
+ /*emit_insn (gen_vector_flat_loadv64si
+ (target, gcn_gen_undef (V64SImode), ptr, exec));*/
+ return target;
+ }
+ case GCN_BUILTIN_FLAT_LOAD_PTR_INT32:
+ case GCN_BUILTIN_FLAT_LOAD_PTR_FLOAT:
+ {
+ if (ignore)
+ return target;
+ rtx exec
+ = force_reg (DImode,
+ expand_expr (CALL_EXPR_ARG (exp, 0), NULL_RTX, DImode,
+ EXPAND_NORMAL));
+ rtx ptr
+ = force_reg (DImode,
+ expand_expr (CALL_EXPR_ARG (exp, 1), NULL_RTX, V64DImode,
+ EXPAND_NORMAL));
+ rtx offsets = force_reg (V64SImode,
+ expand_expr (CALL_EXPR_ARG (exp, 2), NULL_RTX, V64DImode,
+ EXPAND_NORMAL));
+ rtx addrs = gen_reg_rtx (V64DImode);
+ rtx tmp = gen_reg_rtx (V64SImode);
+ emit_insn (gen_ashlv64si3_vector (tmp, offsets,
+ gcn_vec_constant (V64SImode, 2),
+ exec, gcn_gen_undef (V64SImode)));
+ emit_insn (gen_addv64di3_zext_dup2 (addrs, tmp, ptr, exec,
+ gcn_gen_undef (V64DImode)));
+ rtx mem = gen_rtx_MEM (GET_MODE (target), addrs);
+ set_mem_addr_space (mem, ADDR_SPACE_FLAT);
+ /* FIXME: set attributes. */
+ emit_insn (gen_mov_with_exec (target, mem, exec));
+ return target;
+ }
+ case GCN_BUILTIN_FLAT_STORE_PTR_INT32:
+ case GCN_BUILTIN_FLAT_STORE_PTR_FLOAT:
+ {
+ rtx exec
+ = force_reg (DImode,
+ expand_expr (CALL_EXPR_ARG (exp, 0), NULL_RTX, DImode,
+ EXPAND_NORMAL));
+ rtx ptr
+ = force_reg (DImode,
+ expand_expr (CALL_EXPR_ARG (exp, 1), NULL_RTX, V64DImode,
+ EXPAND_NORMAL));
+ rtx offsets = force_reg (V64SImode,
+ expand_expr (CALL_EXPR_ARG (exp, 2), NULL_RTX, V64DImode,
+ EXPAND_NORMAL));
+ enum machine_mode vmode = TYPE_MODE (TREE_TYPE (CALL_EXPR_ARG (exp,
+ 3)));
+ rtx val = force_reg (vmode,
+ expand_expr (CALL_EXPR_ARG (exp, 3), NULL_RTX, vmode,
+ EXPAND_NORMAL));
+ rtx addrs = gen_reg_rtx (V64DImode);
+ rtx tmp = gen_reg_rtx (V64SImode);
+ emit_insn (gen_ashlv64si3_vector (tmp, offsets,
+ gcn_vec_constant (V64SImode, 2),
+ exec, gcn_gen_undef (V64SImode)));
+ emit_insn (gen_addv64di3_zext_dup2 (addrs, tmp, ptr, exec,
+ gcn_gen_undef (V64DImode)));
+ rtx mem = gen_rtx_MEM (vmode, addrs);
+ set_mem_addr_space (mem, ADDR_SPACE_FLAT);
+ /* FIXME: set attributes. */
+ emit_insn (gen_mov_with_exec (mem, val, exec));
+ return target;
+ }
+ default:
+ gcc_unreachable ();
+ }
+}
+
+/* Expansion of simple arithmetic and bit binary operation bultins. */
+
+static rtx
+gcn_expand_builtin_binop (tree exp, rtx target, rtx /*subtarget*/,
+ machine_mode /*mode*/, int ignore,
+ struct gcn_builtin_description *d)
+{
+ tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
+ int icode = d->icode;
+ if (ignore)
+ return target;
+
+ rtx exec = force_reg (DImode,
+ expand_expr (CALL_EXPR_ARG (exp, 0), NULL_RTX, DImode,
+ EXPAND_NORMAL));
+
+ machine_mode m1 = insn_data[icode].operand[1].mode;
+ rtx arg1 = expand_expr (CALL_EXPR_ARG (exp, 1), NULL_RTX, m1,
+ EXPAND_NORMAL);
+ if (!insn_data[icode].operand[1].predicate (arg1, m1))
+ arg1 = force_reg (m1, arg1);
+
+ machine_mode m2 = insn_data[icode].operand[2].mode;
+ rtx arg2 = expand_expr (CALL_EXPR_ARG (exp, 2), NULL_RTX, m2,
+ EXPAND_NORMAL);
+ if (!insn_data[icode].operand[2].predicate (arg2, m2))
+ arg2 = force_reg (m2, arg2);
+
+ rtx arg_prev;
+ if (call_expr_nargs (exp) == 4)
+ {
+ machine_mode m_prev = insn_data[icode].operand[4].mode;
+ arg_prev = force_reg (m_prev,
+ expand_expr (CALL_EXPR_ARG (exp, 3), NULL_RTX,
+ m_prev, EXPAND_NORMAL));
+ }
+ else
+ arg_prev = gcn_gen_undef (GET_MODE (target));
+
+ rtx pat = GEN_FCN (icode) (target, arg1, arg2, exec, arg_prev);
+ emit_insn (pat);
+ return target;
+}
+
+/* Expand an expression EXP that calls a buVilt-in function,
+ with result going to TARGET if that's convenient
+ (and in mode MODE if that's convenient).
+ SUBTARGET may be used as the target for computing one of EXP's operands.
+ IGNORE is nonzero if the value is to be ignored. */
+rtx
+gcn_expand_builtin (tree exp,
+ rtx target,
+ rtx subtarget,
+ machine_mode mode,
+ int ignore)
+{
+ tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
+ unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
+ struct gcn_builtin_description *d;
+
+ gcc_assert (fcode < GCN_BUILTIN_MAX);
+ d = &gcn_builtins[fcode];
+
+ if (d->type == B_UNIMPLEMENTED)
+ sorry ("Builtin not implemented");
+
+ return d->expander (exp, target, subtarget, mode, ignore, d);
+}
+
+
+/* Implement TARGET_CLASS_LIKELY_SPILLED_P. */
+
+static bool
+gcn_class_likely_spilled_p (reg_class_t rclass)
+{
+ return rclass == EXEC_MASK_REG;
+}
+
+/* Implement TARGET_PREFERRED_RELOAD_CLASS.
+
+ Put float CONST_DOUBLE in the constant pool instead of fp regs.
+ QImode must go into class Q_REGS.
+ Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
+ movdf to do mem-to-mem moves through integer regs. */
+
+static reg_class_t
+gcn_preferred_reload_class (rtx x, reg_class_t regclass)
+{
+ machine_mode mode = GET_MODE (x);
+
+ /* We're only allowed to return a subclass of CLASS. Many of the
+ following checks fail for NO_REGS, so eliminate that early. */
+ if (regclass == NO_REGS)
+ return NO_REGS;
+
+ /* All classes can load zeros. */
+ if (x == CONST0_RTX (mode))
+ return regclass;
+
+ if (regclass == EXEC_MASK_REG)
+ return SGPR_EXEC_REGS;
+
+ return regclass;
+}
+
+unsigned
+gcn_frame_pointer_regnum ()
+{
+ if (cfun && cfun->machine)
+ return cfun->machine->args.reg[PRIVATE_SEGMENT_WAVE_OFFSET_ARG];
+ return 0;
+}
+
+bool
+gcn_can_eliminate_p (int /*from_reg*/, int to_reg)
+{
+ return ((unsigned) to_reg == HARD_FRAME_POINTER_REGNUM);
+}
+
+/* Only vector registers can be read/written, be sure to tell IRA about this.
+ */
+
+static reg_class_t
+gcn_secondary_reload (bool, rtx, reg_class_t rclass,
+ machine_mode, secondary_reload_info *)
+{
+ if (rclass == VGPR_REGS)
+ return NO_REGS;
+ return VGPR_REGS;
+}
+
+/* Update register usage after having seen the compiler flags. */
+
+static void
+gcn_conditional_register_usage (void)
+{
+ int i;
+ for (i=0;i<16;i++)
+ fixed_regs[i] = !cfun || !cfun->machine ? 1 : 0;
+ if (!cfun || !cfun->machine)
+ return;
+ if (cfun->machine->args.reg[PRIVATE_SEGMENT_WAVE_OFFSET_ARG] >= 0)
+ fixed_regs[cfun->machine->args.reg[PRIVATE_SEGMENT_WAVE_OFFSET_ARG]] = 1;
+ if (cfun->machine->args.reg[PRIVATE_SEGMENT_BUFFER_ARG] >= 0)
+ {
+ fixed_regs[cfun->machine->args.reg[PRIVATE_SEGMENT_BUFFER_ARG]] = 1;
+ fixed_regs[cfun->machine->args.reg[PRIVATE_SEGMENT_BUFFER_ARG] + 1] = 1;
+ fixed_regs[cfun->machine->args.reg[PRIVATE_SEGMENT_BUFFER_ARG] + 2] = 1;
+ fixed_regs[cfun->machine->args.reg[PRIVATE_SEGMENT_BUFFER_ARG] + 3] = 1;
+ }
+}
+/* TARGET overrides. */
+#undef TARGET_FUNCTION_VALUE
+#define TARGET_FUNCTION_VALUE gcn_function_value
+#undef TARGET_STATIC_CHAIN
+#define TARGET_STATIC_CHAIN gcn_static_chain
+#undef TARGET_FUNCTION_VALUE_REGNO_P
+#define TARGET_FUNCTION_VALUE_REGNO_P gcn_function_value_regno_p
+#undef TARGET_FUNCTION_ARG_ADVANCE
+#define TARGET_FUNCTION_ARG_ADVANCE gcn_function_arg_advance
+#undef TARGET_FUNCTION_ARG
+#define TARGET_FUNCTION_ARG gcn_function_arg
+#undef TARGET_ASM_FILE_START
+#define TARGET_ASM_FILE_START output_file_start
+#undef TARGET_LRA_P
+#define TARGET_LRA_P hook_bool_void_true
+#undef TARGET_REGISTER_MOVE_COST
+#define TARGET_REGISTER_MOVE_COST gcn_register_move_cost
+#undef TARGET_SMALL_REGISTER_CLASSES_FOR_MODE_P
+#define TARGET_SMALL_REGISTER_CLASSES_FOR_MODE_P \
+ gcn_small_register_classes_for_mode_p
+#undef TARGET_RTX_COSTS
+#define TARGET_RTX_COSTS gcn_rtx_costs
+#undef TARGET_MEMORY_MOVE_COST
+#define TARGET_MEMORY_MOVE_COST gcn_memory_move_cost
+#undef TARGET_SPILL_CLASS
+#define TARGET_SPILL_CLASS gcn_spill_class
+#undef TARGET_IRA_CHANGE_PSEUDO_ALLOCNO_CLASS
+#define TARGET_IRA_CHANGE_PSEUDO_ALLOCNO_CLASS \
+ gcn_ira_change_pseudo_allocno_class
+#undef TARGET_SCALAR_MODE_SUPPORTED_P
+#define TARGET_SCALAR_MODE_SUPPORTED_P gcn_scalar_mode_supported_p
+#undef TARGET_VECTOR_MODE_SUPPORTED_P
+#define TARGET_VECTOR_MODE_SUPPORTED_P gcn_vector_mode_supported_p
+#undef TARGET_CLASS_MAX_NREGS
+#define TARGET_CLASS_MAX_NREGS gcn_class_max_nregs
+#undef TARGET_OPTION_OVERRIDE
+#define TARGET_OPTION_OVERRIDE gcn_option_override
+#undef TARGET_EXTRA_LIVE_ON_ENTRY
+#define TARGET_EXTRA_LIVE_ON_ENTRY gcn_live_on_entry
+
+#undef TARGET_ATTRIBUTE_TABLE
+#define TARGET_ATTRIBUTE_TABLE gcn_attribute_table
+
+#undef TARGET_ADDR_SPACE_POINTER_MODE
+#define TARGET_ADDR_SPACE_POINTER_MODE gcn_addr_space_pointer_mode
+
+#undef TARGET_ADDR_SPACE_ADDRESS_MODE
+#define TARGET_ADDR_SPACE_ADDRESS_MODE gcn_addr_space_address_mode
+
+#undef TARGET_ADDR_SPACE_LEGITIMATE_ADDRESS_P
+#define TARGET_ADDR_SPACE_LEGITIMATE_ADDRESS_P \
+ gcn_addr_space_legitimate_address_p
+
+#undef TARGET_ADDR_SPACE_LEGITIMIZE_ADDRESS
+#define TARGET_ADDR_SPACE_LEGITIMIZE_ADDRESS gcn_addr_space_legitimize_address
+
+#undef TARGET_ADDR_SPACE_SUBSET_P
+#define TARGET_ADDR_SPACE_SUBSET_P gcn_addr_space_subset_p
+
+#undef TARGET_ADDR_SPACE_CONVERT
+#define TARGET_ADDR_SPACE_CONVERT gcn_addr_space_convert
+
+#undef TARGET_INIT_BUILTINS
+#define TARGET_INIT_BUILTINS gcn_init_builtins
+#undef TARGET_EXPAND_BUILTIN
+#define TARGET_EXPAND_BUILTIN gcn_expand_builtin
+#undef TARGET_BUILTIN_DECL
+#define TARGET_BUILTIN_DECL gcn_builtin_decl
+/*#undef TARGET_CLASS_LIKELY_SPILLED_P
+#define TARGET_CLASS_LIKELY_SPILLED_P gcn_class_likely_spilled_p
+#undef TARGET_PREFERRED_RELOAD_CLASS
+#define TARGET_PREFERRED_RELOAD_CLASS gcn_preferred_reload_class
+#undef TARGET_CAN_ELIMINATE
+#define TARGET_CAN_ELIMINATE gcn_can_eliminate_p
+#undef TARGET_SECONDARY_RELOAD
+#define TARGET_SECONDARY_RELOAD gcn_secondary_reload*/
+#undef TARGET_CONDITIONAL_REGISTER_USAGE
+#define TARGET_CONDITIONAL_REGISTER_USAGE gcn_conditional_register_usage
+
+struct gcc_target targetm = TARGET_INITIALIZER;
+
+#include "gt-gcn.h"
diff --git a/gcc/config/gcn/gcn.h b/gcc/config/gcn/gcn.h
new file mode 100644
index 00000000000..3b41095542c
--- /dev/null
+++ b/gcc/config/gcn/gcn.h
@@ -0,0 +1,718 @@
+/* Copyright (C) 2016-2017 Free Software Foundation, Inc.
+
+ This file is free software; you can redistribute it and/or modify it under
+ the terms of the GNU General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your option)
+ any later version.
+
+ This file is distributed in the hope that it will be useful, but WITHOUT
+ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with GCC; see the file COPYING3. If not see
+ <http://www.gnu.org/licenses/>. */
+
+#include "config/gcn/gcn-opts.h"
+
+
+/* FIXME */
+#define TARGET_CPU_CPP_BUILTINS()
+
+/* Temporarily disable libgcc until one actually exists. */
+#undef LIBGCC_SPEC
+#define LIBGCC_SPEC ""
+
+/* Use LLVM assembler options. */
+#undef ASM_SPEC
+#define ASM_SPEC "-triple=amdgcn--amdhsa %{march=*:-mcpu=%*} -filetype=obj"
+
+#undef LINK_SPEC
+#define LINK_SPEC ""
+
+/* Support for a compile-time default architecture and tuning. The rules are:
+ --with-arch is ignored if -march is specified.
+ --with-tune is ignored if -mtune is specified. */
+#define OPTION_DEFAULT_SPECS \
+ {"arch", "%{!march=*:-march=%(VALUE)}" }, \
+ {"tune", "%{!mtune=*:-mtune=%(VALUE)}" }
+
+/* Default target_flags if no switches specified. */
+#ifndef TARGET_DEFAULT
+#define TARGET_DEFAULT 0
+#endif
+
+
+/* Storage Layout */
+
+#define BITS_BIG_ENDIAN 0
+
+#define BYTES_BIG_ENDIAN 0
+
+#define WORDS_BIG_ENDIAN 0
+
+#define BITS_PER_WORD 32
+
+#define UNITS_PER_WORD (BITS_PER_WORD/BITS_PER_UNIT)
+
+#ifndef LIBGCC2_UNITS_PER_WORD
+#define LIBGCC2_UNITS_PER_WORD 8
+#endif
+
+#define TARGET_64BIT 1
+
+/* Scratch memory is addressed by buffered meomry accesses that are 32bit. */
+#define POINTER_SIZE 32
+
+#define PARM_BOUNDARY 128
+
+/* FIXME */
+#define STACK_BOUNDARY 128
+
+#define FUNCTION_BOUNDARY 32
+
+#define BIGGEST_ALIGNMENT 512
+
+/* FIXME */
+#define DATA_ALIGNMENT(TYPE,ALIGN) ((ALIGN) > 128 ? (ALIGN) : 128)
+#define CONSTANT_ALIGNMENT(TYPE,ALIGN) ((ALIGN) > 128 ? (ALIGN) : 128)
+#define LOCAL_ALIGNMENT(TYPE,ALIGN) ((ALIGN) > 128 ? (ALIGN) : 128)
+
+/* FIXME */
+#define EMPTY_FIELD_BOUNDARY 32
+
+#define STRICT_ALIGNMENT 1
+
+/* FIXME */
+#define PCC_BITFIELD_TYPE_MATTERS 1
+
+/* FIXME */
+#define MAX_FIXED_MODE_SIZE 128
+
+#define STACK_SIZE_MODE DImode
+
+
+/* Type Layout: match what x86-64 does. */
+
+#define INT_TYPE_SIZE 32
+
+#define LONG_TYPE_SIZE (TARGET_64BIT ? 64 : 32)
+
+#define LONG_LONG_TYPE_SIZE (TARGET_64BIT ? 128 : 64)
+
+#define FLOAT_TYPE_SIZE 32
+
+#define DOUBLE_TYPE_SIZE 64
+
+/* FIXME: software emulated? */
+#define LONG_DOUBLE_TYPE_SIZE 64
+
+#define DEFAULT_SIGNED_CHAR 1
+
+/* Register Basics */
+#define FIRST_SGPR_REG 0
+#define LAST_SGPR_REG 101
+
+#define FLAT_SCRATCH_REG 102
+#define FLAT_SCRATCH_LO_REG 102
+#define FLAT_SCRATCH_HI_REG 103
+#define XNACK_MASK_REG 104
+#define XNACK_MASK_LO_REG 104
+#define XNACK_MASK_HI_REG 105
+#define VCC_LO_REG 106
+#define VCC_HI_REG 107
+#define VCCZ_REG 108
+#define TBA_REG 109
+#define TBA_LO_REG 109
+#define TBA_HI_REG 110
+#define TMA_REG 111
+#define TMA_LO_REG 111
+#define TMA_HI_REG 112
+#define TTMP0_REG 113
+#define TTMP11_REG 124
+#define M0_REG 125
+#define EXEC_REG 126
+#define EXEC_LO_REG 126
+#define EXEC_HI_REG 127
+#define EXECZ_REG 128
+#define SCC_REG 129
+/* 132-159 is reserved; I am lazy to produce masks. */
+
+#define FIRST_VGPR_REG 160
+#define LAST_VGPR_REG 415
+
+#define VGPR_REGNO(N) ((N)+FIRST_VGPR_REG)
+#define SGPR_REGNO(N) ((N)+FIRST_SGPR_REG)
+
+#define SGPR_OR_VGPR_REGNO_P(N) ((N)>=FIRST_VGPR_REG && (N) <= LAST_SGPR_REG)
+#define SGPR_REGNO_P(N) ((N) <= LAST_SGPR_REG)
+#define VGPR_REGNO_P(N) ((N)>=FIRST_VGPR_REG && (N) <= LAST_VGPR_REG)
+#define CC_REG_P(X) (REG_P (X) && CC_REGNO_P (REGNO (X)))
+#define CC_REGNO_P(X) ((X) == SCC_REG || (X) == VCC_REG)
+
+#define FIRST_PSEUDO_REGISTER 416
+
+/* s[16:17] is fixed for exec hack moves.
+ Both registers can be probably eliminated and passed to regalloc for general
+ purpose with some magic. */
+#define FIXED_REGISTERS { \
+ /* Scalars. */ \
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \
+ 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, \
+ /* Special regs and padding. */ \
+/* flat xnack vcc tba tma ttmp */ \
+ 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, \
+/* m0 exec scc */ \
+ 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, \
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, \
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, \
+ /* VGRPs */ \
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \
+}
+
+#define CALL_USED_REGISTERS { \
+ /* Scalars. */ \
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, \
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, \
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, \
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, \
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, \
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, \
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, \
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, \
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, \
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, \
+ /* Special regs and padding. */ \
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, \
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, \
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, \
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, \
+ /* VGRPs */ \
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, \
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, \
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, \
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, \
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, \
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, \
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, \
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, \
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, \
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, \
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, \
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, \
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, \
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, \
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, \
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, \
+}
+
+
+/* Values in Registers */
+
+#define HARD_REGNO_NREGS(REGNO, MODE) gcn_hard_regno_nregs (REGNO, MODE)
+#define HARD_REGNO_MODE_OK(REGNO, MODE) gcn_hard_regno_mode_ok (REGNO, MODE)
+#define CANNOT_CHANGE_MODE_CLASS(FROM,TO,CLASS) gcn_cannot_change_mode_class (FROM, TO, CLASS)
+
+#define MODES_TIEABLE_P(MODE1, MODE2) \
+ (GET_MODE_BITSIZE (MODE1) <= MAX_FIXED_MODE_SIZE \
+ && GET_MODE_BITSIZE (MODE2) <= MAX_FIXED_MODE_SIZE)
+
+/* Register Classes */
+
+enum reg_class {
+ NO_REGS,
+
+ /* SCC */
+ SCC_CONDITIONAL_REG,
+
+ /* VCCZ */
+ VCCZ_CONDITIONAL_REG,
+
+ /* VCC */
+ VCC_CONDITIONAL_REG,
+
+ /* EXECZ */
+ EXECZ_CONDITIONAL_REG,
+
+ /* SCC VCCZ EXECZ */
+ ALL_CONDITIONAL_REGS,
+
+ /* EXEC */
+ EXEC_MASK_REG,
+
+ /* SGPR0-101 */
+ SGPR_REGS,
+
+ /* SGPR0-101 EXEC_LO/EXEC_HI */
+ SGPR_EXEC_REGS,
+
+ /* SGPR0-101, VCC LO/HI, TBA LO/HI, TMA LO/HI, TTMP0-11, M0, EXEC LO/HI,
+ VCCZ, EXECZ, SCC
+ FIXME: Maybe manual has bug and FLAT_SCRATCH is OK. */
+ SGPR_VOP3A_SRC_REGS,
+
+ /* SGPR0-101, FLAT_SCRATCH_LO/HI, XNACK_MASK_LO/HI, VCC LO/HI, TBA LO/HI
+ TMA LO/HI, TTMP0-11 */
+ SGPR_MEM_SRC_REGS,
+
+ /* SGPR0-101, FLAT_SCRATCH_LO/HI, XNACK_MASK_LO/HI, VCC LO/HI, TBA LO/HI
+ TMA LO/HI, TTMP0-11, M0, EXEC LO/HI */
+ SGPR_DST_REGS,
+
+ /* SGPR0-101, FLAT_SCRATCH_LO/HI, XNACK_MASK_LO/HI, VCC LO/HI, TBA LO/HI
+ TMA LO/HI, TTMP0-11 */
+ SGPR_SRC_REGS,
+ GENERAL_REGS,
+ VGPR_REGS,
+ SRCDST_REGS,
+ ALL_REGS,
+ LIM_REG_CLASSES
+};
+
+#define N_REG_CLASSES (int) LIM_REG_CLASSES
+
+#define REG_CLASS_NAMES \
+{ "NO_REGS", \
+ "SCC_CONDITIONAL_REG", \
+ "VCCZ_CONDITIONAL_REG", \
+ "VCC_CONDITIONAL_REG", \
+ "EXECZ_CONDITIONAL_REG", \
+ "ALL_CONDITIONAL_REGS", \
+ "EXEC_MASK_REG", \
+ "SGPR_REGS", \
+ "SGPR_EXEC_REGS", \
+ "SGPR_VOP3A_SRC_REGS", \
+ "SGPR_MEM_SRC_REGS", \
+ "SGPR_SRC_REGS", \
+ "SGPR_DST_REGS", \
+ "GENERAL_REGS", \
+ "VGPR_REGS", \
+ "SRCDST_REGS", \
+ "ALL_REGS" \
+}
+
+#define NAMED_REG_MASK(N) (1<<((N)-3*32))
+#define NAMED_REG_MASK2(N) (1<<((N)-4*32))
+
+#define REG_CLASS_CONTENTS { \
+ {0, 0, 0, 0, \
+ 0, 0, 0, 0, \
+ 0, 0, 0, 0, 0}, /* no regs */ \
+ {0, 0, 0, 0, \
+ NAMED_REG_MASK2 (SCC_REG), 0, 0, 0, \
+ 0, 0, 0, 0}, /* scc reg */ \
+ {0, 0, 0, \
+ NAMED_REG_MASK (VCCZ_REG), 0, 0, 0, 0, \
+ 0, 0, 0, 0, 0}, /* vccz reg */ \
+ {0, 0, 0, \
+ NAMED_REG_MASK (VCC_LO_REG)|NAMED_REG_MASK (VCC_HI_REG), 0, 0, 0, 0,\
+ 0, 0, 0, 0, 0}, /* vccz reg */ \
+ {0, 0, 0, 0, \
+ NAMED_REG_MASK2 (EXECZ_REG), 0, 0, 0, \
+ 0, 0, 0, 0}, /* execz reg */ \
+ {0, 0, 0, \
+ NAMED_REG_MASK (VCCZ_REG), \
+ NAMED_REG_MASK (EXECZ_REG) | NAMED_REG_MASK2 (SCC_REG), \
+ 0, 0, 0, \
+ 0, 0, 0, 0, 0}, /* all conditional regs */ \
+ {0, 0, 0, \
+ NAMED_REG_MASK (EXEC_LO_REG) | NAMED_REG_MASK (EXEC_HI_REG),\
+ 0, \
+ 0, 0, 0, \
+ 0, 0, 0, 0, 0}, /* exec mask reg */ \
+ {0xffffffff, 0xffffffff, 0xffffffff, 0xf1, \
+ 0, 0, 0, 0, \
+ 0, 0, 0, 0, \
+ 0}, /* sgpr regs*/ \
+ {0xffffffff, 0xffffffff, 0xffffffff, 0xf1 \
+ | NAMED_REG_MASK (EXEC_LO_REG) | NAMED_REG_MASK (EXEC_HI_REG),\
+ 0, 0, 0, 0, \
+ 0, 0, 0, 0, \
+ 0}, /* sgpr exec regs*/ \
+ {0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff \
+ -NAMED_REG_MASK (FLAT_SCRATCH_LO_REG) \
+ -NAMED_REG_MASK (FLAT_SCRATCH_HI_REG) \
+ -NAMED_REG_MASK (XNACK_MASK_LO_REG) \
+ -NAMED_REG_MASK (XNACK_MASK_HI_REG), \
+ NAMED_REG_MASK (EXECZ_REG) | NAMED_REG_MASK2 (SCC_REG), \
+ 0, 0, 0, \
+ 0, 0, 0, 0, \
+ 0}, /* sgpr vopra regs*/\
+ {0xffffffff, 0xffffffff, 0xffffffff, \
+ 0xffffffff-NAMED_REG_MASK (VCCZ_REG)-NAMED_REG_MASK (M0_REG)\
+ -NAMED_REG_MASK (EXEC_LO_REG)-NAMED_REG_MASK (EXEC_HI_REG),\
+ 0, 0, 0, 0, \
+ 0, 0, 0, 0, \
+ 0}, /* sgpr src mem regs*/\
+ {0xffffffff, 0xffffffff, 0xffffffff, \
+ 0xffffffff-NAMED_REG_MASK (VCCZ_REG), \
+ 0, 0, 0, 0, \
+ 0, 0, 0, 0, \
+ 0}, /* sgpr dst regs*/\
+ {0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, \
+ NAMED_REG_MASK2 (EXECZ_REG) | NAMED_REG_MASK2 (SCC_REG), \
+ 0, 0, 0, \
+ 0, 0, 0, 0, \
+ 0}, /* sgpr src regs*/\
+ {0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, \
+ 0xffffffff, 0, 0, 0, \
+ 0, 0, 0, 0, \
+ 0}, /* general regs*/ \
+ {0, 0, 0, 0, \
+ 0, 0xffffffff, 0xffffffff, 0xffffffff, \
+ 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, \
+ 0xffffffff}, /* vector regs */ \
+ {0xffffffff, 0xffffffff, 0xffffffff, \
+ 0xffffffff-NAMED_REG_MASK (VCCZ_REG), \
+ 0, 0xffffffff, 0xffffffff, 0xffffffff, \
+ 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, \
+ 0xffffffff}, /* srcdst regs*/\
+ {0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, \
+ 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, \
+ 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, \
+ 0xffffffff}} /* all regs*/
+
+#define REGNO_REG_CLASS(REGNO) gcn_regno_reg_class (REGNO)
+#define MODE_CODE_BASE_REG_CLASS(MODE, AS, OUTER, INDEX) \
+ gcn_mode_code_base_reg_class (MODE, AS, OUTER, INDEX)
+#define REGNO_MODE_CODE_OK_FOR_BASE_P(NUM, MODE, AS, OUTER, INDEX) \
+ gcn_regno_mode_code_ok_for_base_p (NUM, MODE, AS, OUTER, INDEX)
+#define INDEX_REG_CLASS VGPR_REGS
+#define REGNO_OK_FOR_INDEX_P(regno) regno_ok_for_index_p (regno)
+
+/* Forward declaration so the following compiles. */
+extern short *reg_renumber;
+
+#define INT_REG_OK_FOR_INDEX_P(X, STRICT) \
+ ((!(STRICT) || REGNO_OK_FOR_INDEX_P (REGNO (X))))
+#define INT_REG_OK_FOR_BASE_P(X, STRICT) \
+ ((!(STRICT) || REGNO_OK_FOR_BASE_P (REGNO (X))))
+
+/* Address spaces. */
+enum gcn_address_spaces {
+ ADDR_SPACE_SCRATCH = 0,
+ ADDR_SPACE_FLAT,
+ ADDR_SPACE_SCALAR_FLAT,
+ ADDR_SPACE_LDS,
+ ADDR_SPACE_GDS
+};
+#define REGISTER_TARGET_PRAGMAS() do { \
+c_register_addr_space ("__flat", ADDR_SPACE_FLAT); \
+c_register_addr_space ("__scalar_flat", ADDR_SPACE_SCALAR_FLAT); \
+c_register_addr_space ("__lds", ADDR_SPACE_LDS); \
+c_register_addr_space ("__gds", ADDR_SPACE_LDS); \
+}while (0);
+
+/* Sections */
+#define GLOBAL_ASM_OP "\t.globl\t"
+
+/* File Framework */
+#undef ASM_DECLARE_FUNCTION_NAME
+#define ASM_DECLARE_FUNCTION_NAME(FILE, NAME, DECL) \
+ gcn_hsa_declare_function_name ((FILE), (NAME), (DECL))
+
+#define ASM_APP_ON ""
+
+#define ASM_APP_OFF ""
+
+
+/* Uninitialized Data */
+#define ASM_OUTPUT_COMMON(FILE, NAME, SIZE, ROUNDED) \
+( fputs (".comm ", (FILE)), \
+ assemble_name ((FILE), (NAME)), \
+ fprintf ((FILE), ",%d\n", (ROUNDED)))
+
+#define ASM_OUTPUT_LOCAL(FILE, NAME, SIZE, ROUNDED) \
+( fputs (".lcomm ", (FILE)), \
+ assemble_name ((FILE), (NAME)), \
+ fprintf ((FILE), ",%d\n", (ROUNDED)))
+
+
+/* Label Output */
+#define ASM_OUTPUT_LABEL(FILE,NAME) \
+ do { assemble_name (FILE, NAME); fputs (":\n", FILE); } while (0)
+
+#define ASM_OUTPUT_LABELREF(FILE, NAME) \
+ asm_fprintf (FILE, "%U%s", default_strip_name_encoding (NAME))
+
+#define ASM_OUTPUT_SYMBOL_REF(FILE, X) \
+ do \
+ { \
+ tree decl; \
+ assemble_name (FILE, XSTR ((X), 0)); \
+ if ((decl = SYMBOL_REF_DECL ((X))) != 0 \
+ && TREE_CODE (decl) == VAR_DECL \
+ && TYPE_ADDR_SPACE (TREE_TYPE (decl))) \
+ fputs ("@ppu", FILE); \
+ } while (0)
+
+
+/* Instruction Output */
+#define REGISTER_NAMES \
+ {"s0", "s1", "s2", "s3", "s4", "s5", "s6", "s7", "s8", "s9", \
+ "s10", "s11", "s12", "s13", "s14", "s15", "s16", "s17", "s18", "s19", \
+ "s20", "s21", "s22", "s23", "s24", "s25", "s26", "s27", "s28", "s29", \
+ "s30", "s31", "s32", "s33", "s34", "s35", "s36", "s37", "s38", "s39", \
+ "s40", "s41", "s42", "s43", "s44", "s45", "s46", "s47", "s48", "s49", \
+ "s50", "s51", "s52", "s53", "s54", "s55", "s56", "s57", "s58", "s59", \
+ "s60", "s61", "s62", "s63", "s64", "s65", "s66", "s67", "s68", "s69", \
+ "s70", "s71", "s72", "s73", "s74", "s75", "s76", "s77", "s78", "s79", \
+ "s80", "s81", "s82", "s83", "s84", "s85", "s86", "s87", "s88", "s89", \
+ "s90", "s91", "s92", "s93", "s94", "s95", "s96", "s97", "s98", "s99", \
+ "s100", "s101", \
+ "flat_scratch_lo", "flat_scratch_hi", "xnack_mask_lo", "xnack_mask_hi", \
+ "vcc_lo", "vcc_hi", "vccz", "tba_lo", "tba_hi", "tma_lo", "tma_hi", \
+ "ttmp0", "ttmp1", "ttmp2", "ttmp3", "ttmp4", "ttmp5", "ttmp6", "ttmp7", \
+ "ttmp8", "ttmp9", "ttmp10", "ttmp11", "m0", "exec_lo", "exec_hi", "execz", \
+ "scc", "res130", "res131", "res132", "res133", "res134", "res135", \
+ "res136", "res137", "res138", "res139", \
+ "res140", "res141", "res142", "res143", "res144", "res145", "res146", \
+ "res147", "res148", "res149", \
+ "res150", "res151", "res152", "res153", "res154", "res155", "res156", \
+ "res157", "res158", "res159", \
+ "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9", \
+ "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", "v18", "v19", \
+ "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", \
+ "v30", "v31", "v32", "v33", "v34", "v35", "v36", "v37", "v38", "v39", \
+ "v40", "v41", "v42", "v43", "v44", "v45", "v46", "v47", "v48", "v49", \
+ "v50", "v51", "v52", "v53", "v54", "v55", "v56", "v57", "v58", "v59", \
+ "v60", "v61", "v62", "v63", "v64", "v65", "v66", "v67", "v68", "v69", \
+ "v70", "v71", "v72", "v73", "v74", "v75", "v76", "v77", "v78", "v79", \
+ "v80", "v81", "v82", "v83", "v84", "v85", "v86", "v87", "v88", "v89", \
+ "v90", "v91", "v92", "v93", "v94", "v95", "v96", "v97", "v98", "v99", \
+ "v100", "v101", "v102", "v103", "v104", "v105", "v106", "v107", "v108", \
+ "v109", \
+ "v110", "v111", "v112", "v113", "v114", "v115", "v116", "v117", "v118", \
+ "v119", \
+ "v120", "v121", "v122", "v123", "v124", "v125", "v126", "v127", "v128", \
+ "v129", \
+ "v130", "v131", "v132", "v133", "v134", "v135", "v136", "v137", "v138", \
+ "v139", \
+ "v140", "v141", "v142", "v143", "v144", "v145", "v146", "v147", "v148", \
+ "v149", \
+ "v150", "v151", "v152", "v153", "v154", "v155", "v156", "v157", "v158", \
+ "v159", \
+ "v160", "v161", "v162", "v163", "v164", "v165", "v166", "v167", "v168", \
+ "v169", \
+ "v170", "v171", "v172", "v173", "v174", "v175", "v176", "v177", "v178", \
+ "v179", \
+ "v180", "v181", "v182", "v183", "v184", "v185", "v186", "v187", "v188", \
+ "v189", \
+ "v190", "v191", "v192", "v193", "v194", "v195", "v196", "v197", "v198", \
+ "v199", \
+ "v200", "v201", "v202", "v203", "v204", "v205", "v206", "v207", "v208", \
+ "v209", \
+ "v210", "v211", "v212", "v213", "v214", "v215", "v216", "v217", "v218", \
+ "v219", \
+ "v220", "v221", "v222", "v223", "v224", "v225", "v226", "v227", "v228", \
+ "v229", \
+ "v230", "v231", "v232", "v233", "v234", "v235", "v236", "v237", "v238", \
+ "v239", \
+ "v240", "v241", "v242", "v243", "v244", "v245", "v246", "v247", "v248", \
+ "v249", \
+ "v250", "v251", "v252", "v253", "v254", "v255", \
+ }
+
+#define PRINT_OPERAND(FILE, X, CODE) print_operand(FILE, X, CODE)
+
+#define PRINT_OPERAND_ADDRESS(FILE, ADDR) print_operand_address (FILE, ADDR)
+
+#define LOCAL_LABEL_PREFIX "."
+
+#define USER_LABEL_PREFIX ""
+
+#define ASM_COMMENT_START "#"
+
+
+/* Dispatch Tables */
+
+#define ASM_OUTPUT_ADDR_DIFF_ELT(FILE, BODY, VALUE, REL) \
+ fprintf (FILE, "\t.word .L%d-.L%d\n", VALUE, REL)
+
+#define ASM_OUTPUT_ADDR_VEC_ELT(FILE, VALUE) \
+ fprintf (FILE, "\t.word .L%d\n", VALUE)
+
+
+/* Alignment Output */
+
+#define ASM_OUTPUT_ALIGN(FILE,LOG) \
+ do { if (LOG!=0) fprintf (FILE, "\t.align\t%d\n", 1<<(LOG)); } while (0)
+
+
+/* Misc */
+
+#define CASE_VECTOR_MODE DImode
+
+#define Pmode SImode
+
+#define FUNCTION_MODE QImode
+
+/* Frame Registers, and other registers */
+
+#define STACK_POINTER_REGNUM FLAT_SCRATCH_REG
+
+/* FIXME: The following declaration is also in gen-protos.h. Either remove it
+ from there or from here if it is not necessary in the following macro. */
+extern unsigned gcn_frame_pointer_regnum ();
+/* FIXME. */
+#define HARD_FRAME_POINTER_REGNUM gcn_frame_pointer_regnum ()
+
+#define FRAME_POINTER_REGNUM FLAT_SCRATCH_REG
+
+#define HARD_FRAME_POINTER_IS_ARG_POINTER false
+#define HARD_FRAME_POINTER_IS_FRAME_POINTER false
+/* There is no arg pointer. Just choose random fixed register that does
+ not intefere with anything. */
+#define ARG_POINTER_REGNUM FLAT_SCRATCH_REG
+/* FIXME. */
+#define FUNCTION_ARG_REGNO_P(N) 0
+
+/* Frame Layout all FIXME */
+
+#define STACK_GROWS_DOWNWARD 0
+
+#define FRAME_GROWS_DOWNWARD 0
+
+#define STARTING_FRAME_OFFSET (0)
+
+#define STACK_POINTER_OFFSET 32
+
+#define FIRST_PARM_OFFSET(FNDECL) (0)
+
+#define DYNAMIC_CHAIN_ADDRESS(FP) plus_constant (Pmode, (FP), -16)
+
+#define INITIAL_FRAME_POINTER_OFFSET(N) (N)
+
+/* Register Arguments */
+
+#define GCN_KERNEL_ARG_TYPES 19
+
+struct GTY(()) gcn_kernel_args
+{
+ long requested;
+ int reg[GCN_KERNEL_ARG_TYPES];
+ int order[GCN_KERNEL_ARG_TYPES];
+ int nargs, nsgprs;
+};
+
+typedef struct gcn_args {
+ struct gcn_kernel_args args;
+ int num;
+ int offset;
+ int alignment;
+} CUMULATIVE_ARGS;
+
+#define INIT_CUMULATIVE_ARGS(CUM,FNTYPE,LIBNAME,FNDECL,N_NAMED_ARGS) \
+ gcn_init_cumulative_args (&(CUM), (FNTYPE), (LIBNAME), (FNDECL), \
+ (N_NAMED_ARGS) != -1)
+
+/* Address spaces. */
+
+#define MAX_REGS_PER_ADDRESS 2
+
+/* Profiling */
+
+#define FUNCTION_PROFILER(FILE, LABELNO)
+
+#define NO_PROFILE_COUNTERS 1
+
+#define PROFILE_BEFORE_PROLOGUE 0
+
+/* Trampolines */
+/*FIXME*/
+#define TRAMPOLINE_SIZE (65)
+/*FIXME*/
+#define TRAMPOLINE_ALIGNMENT 64
+
+/* Misc */
+
+#define MOVE_MAX 16
+
+#define TRULY_NOOP_TRUNCATION(OUTPREC, INPREC) ((INPREC) <= 32 \
+ && (OUTPREC) <= (INPREC))
+
+struct GTY(()) machine_function
+{
+ /* Register holding default value of EXEC. */
+ rtx exec_reg;
+ bool exec_reg_init;
+ struct gcn_kernel_args args;
+ int kernarg_segment_alignment;
+ int kernarg_segment_byte_size;
+};
+
+/* Definitions for register eliminations.
+
+ This is an array of structures. Each structure initializes one pair
+ of eliminable registers. The "from" register number is given first,
+ followed by "to". Eliminations of the same "from" register are listed
+ in order of preference.
+
+ There are two registers that can always be eliminated on the i386.
+ The frame pointer and the arg pointer can be replaced by either the
+ hard frame pointer or to the stack pointer, depending upon the
+ circumstances. The hard frame pointer is not used before reload and
+ so it is not eligible for elimination. */
+
+#define ELIMINABLE_REGS \
+{{ FRAME_POINTER_REGNUM, 0}, \
+ { FRAME_POINTER_REGNUM, 1}, \
+ { FRAME_POINTER_REGNUM, 2}, \
+ { FRAME_POINTER_REGNUM, 3}, \
+ { FRAME_POINTER_REGNUM, 4}, \
+ { FRAME_POINTER_REGNUM, 5}, \
+ { FRAME_POINTER_REGNUM, 6}, \
+ { FRAME_POINTER_REGNUM, 7}, \
+ { FRAME_POINTER_REGNUM, 8}, \
+ { FRAME_POINTER_REGNUM, 9}, \
+ { FRAME_POINTER_REGNUM, 10}, \
+ { FRAME_POINTER_REGNUM, 11}, \
+ { FRAME_POINTER_REGNUM, 12}, \
+ { FRAME_POINTER_REGNUM, 13}, \
+ { FRAME_POINTER_REGNUM, 14}, \
+ { FRAME_POINTER_REGNUM, 15}} \
+
+/* Define the offset between two registers, one to be eliminated, and the other
+ its replacement, at the start of a routine. */
+
+#define INITIAL_ELIMINATION_OFFSET(FROM, TO, OFFSET) ((OFFSET) = 0)
+
+#define SLOW_BYTE_ACCESS 0
+
+/* Define this macro if it is advisable to hold scalars in registers
+ in a wider mode than that declared by the program. In such cases,
+ the value is constrained to be within the bounds of the declared
+ type, but kept valid in the wider mode. The signedness of the
+ extension may differ from that of the type. */
+
+#define PROMOTE_MODE(MODE,UNSIGNEDP,TYPE) \
+ if (GET_MODE_CLASS (MODE) == MODE_INT \
+ && (TYPE == NULL || TREE_CODE (TYPE) != VECTOR_TYPE) \
+ && GET_MODE_SIZE (MODE) < UNITS_PER_WORD) \
+ { \
+ (MODE) = SImode; \
+ }
diff --git a/gcc/config/gcn/gcn.md b/gcc/config/gcn/gcn.md
new file mode 100644
index 00000000000..b9d8a906702
--- /dev/null
+++ b/gcc/config/gcn/gcn.md
@@ -0,0 +1,1068 @@
+;; Copyright (C) 2016-2017 Free Software Foundation, Inc.
+
+;; This file is free software; you can redistribute it and/or modify it under
+;; the terms of the GNU General Public License as published by the Free
+;; Software Foundation; either version 3 of the License, or (at your option)
+;; any later version.
+
+;; This file is distributed in the hope that it will be useful, but WITHOUT
+;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+;; FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+;; for more details.
+
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3. If not see
+;; <http://www.gnu.org/licenses/>.
+
+;;- See file "rtl.def" for documentation on define_insn, match_*, et. al.
+
+;; -------------------------------------------------------------------------
+;; Constants and enums
+;; -------------------------------------------------------------------------
+
+; Named registers
+(define_constants
+ [(FIRST_SGPR_REG 0)
+ (LAST_SGPR_REG 101)
+ (FLAT_SCRATCH_REG 102)
+ (FLAT_SCRATCH_LO_REG 102)
+ (FLAT_SCRATCH_HI_REG 103)
+ (XNACK_MASK_REG 104)
+ (XNACK_MASK_LO_REG 104)
+ (XNACK_MASK_HI_REG 105)
+ (VCC_REG 106)
+ (VCC_LO_REG 106)
+ (VCC_HI_REG 107)
+ (VCCZ_REG 108)
+ (TBA_REG 109)
+ (TBA_LO_REG 109)
+ (TBA_HI_REG 110)
+ (TMA_REG 111)
+ (TMA_LO_REG 111)
+ (TMA_HI_REG 112)
+ (TTMP0_REG 113)
+ (TTMP11_REG 124)
+ (M0_REG 125)
+ (EXEC_REG 126)
+ (EXEC_LO_REG 126)
+ (EXEC_HI_REG 127)
+ (EXECZ_REG 128)
+ (SCC_REG 129)
+; Lazyness gap of unused hard regs.
+ (FIRST_VGPR_REG 160)
+ (LAST_VGPR_REG 415)]
+ )
+
+; Named unspec values
+(define_c_enum "unspecv" [
+ UNSPECV_PROLOGUE_USE])
+
+(define_c_enum "unspec" [
+ UNSPEC_VECTOR])
+
+;; -------------------------------------------------------------------------
+;; Attributes
+;; -------------------------------------------------------------------------
+
+; Every instructions should set MODE and TYPE. Other attributes should be
+; more or less determined by their conditionals.
+
+; Main data type used by the insn
+(define_attr "mode"
+ "unknown,none,BI,QI,HI,SI,DI,TI,HF,SF,DF,V2SF,V2SI,V3DI,V64SI,V64DI,V64SF,
+ V64DF,V4SF,V2DF,V3SF,V4SI,V3SI,V2DI,V64HF,V64HI,V64QI"
+ (const_string "unknown"))
+
+; Instruction type (encoding) as described the specification.
+; The following table summarizes possible operands of individual instruction
+; types and corresponding constraints.
+;
+; sop2 - scalar, two inputs, one output
+; ssrc0/ssrc1: sgpr 0-102; flat_scratch,xnack,vcc,tba,tma,ttmp0-11,exec
+; vccz,execz,scc,inline immedate,fp inline immediate
+; sdst: sgpr 0-102; flat_scratch,xnack,vcc,tba,tma,ttmp0-11,exec
+;
+; Constraints "=SD, SD", "SSA,SSB","SSB,SSA"
+;
+; sopk - scalar, inline constant input, one output
+; simm16: 16bit inline constant
+; sdst: same as sop2/ssrc0
+;
+; Constraints "=SD", "J"
+;
+; sop1 - scalar, one input, one output
+; ssrc0: same as sop2/ssrc0. FIXME: manual omit VCCZ
+; sdst: same as sop2/sdst
+;
+; Constraints "=SD", "SSA"
+;
+; sopc - scalar, two inputs, one comparsion
+; ssrc0: same as sop2/ssc0.
+;
+; Constraints "SSI,SSA","SSA,SSI"
+;
+; sopp - scalar, one constant input, one special
+; simm16
+;
+; smem - scalar memory
+; sbase: aligned pair of sgprs. Specify {size[15:0], base[47:0]} in
+; dwords
+; sdata: sgpr0-102, flat_scratch, xnack, vcc, tba, tma
+; offset: sgpr or 20bit unsigned byte offset
+;
+; vop2 - vector, two inputs, one output
+; vsrc0: sgpr0-102,flat_scratch,xnack,vcc,tba,ttmp0-11,m0,exec,
+; inline constant -16 to -64, fp inline immediate, vccz, execz,
+; scc, lds, literal constant, vgpr0-255
+; vsrc1: vgpr0-255
+; vdst: vgpr0-255
+; Limitations: At most one SGPR, at most one constant
+; if constant is used, SGPR must be M0
+; Only SRC0 can be LDS_DIRECT
+;
+; constraints: "=v", "vBSS", "v"
+;
+; vop1 - vector, one input, one output
+; vsrc0: same as vop2/src0
+; vdst: vgpr0-255
+;
+; constraints: "=v", "vBSS"
+;
+; vopc - vector, two inputs, one comparsion output;
+; vsrc0: same as vop2/src0
+; vsrc1: vgpr0-255
+; vdst:
+;
+; constraints: "vASS", "v"
+;
+; vop3a - vector, three inputs, one output
+; vdst: vgpr0-255, for v_cmp sgpr or vcc
+; abs,clamp
+; vsrc0: sgpr0-102,vcc,tba,ttmp0-11,m0,exec,
+; inline constant -16 to -64, fp inline immediate, vccz, execz,
+; scc, lds_direct
+; FIXME: really missing 1/pi? really 104 SGPRs
+;
+; vop3b - vector, three inputs, one vector output, one scalar output
+; vsrc0,vsrc1,vsrc2: same as vop3a vsrc0
+; vdst: vgpr0-255
+; sdst: sgpr0-103/vcc/tba/tma/ttmp0-11
+;
+;
+; mult is for insn representing multiple instructions, vmult is for insn
+; representing multiple instruction that include vector
+
+(define_attr "type"
+ "unknown,sop1,sop2,sopk,sopc,sopp,smem,dsmem,vop2,vop1,vopc,
+ vop3a,vop3b,vintr,lds,mubuf,mtbuf,exp,flat,mult,vmult"
+ (const_string "unknown"))
+
+; Set if instruction is executed in scalar or vector unit
+
+(define_attr "unit" "unknown,scalar,vector"
+ (cond [(eq_attr "type" "sop1,sop2,sopk,sopc,sopp,smem,mult")
+ (const_string "scalar")
+ (eq_attr "type" "vop2,vop1,vopc,vop3a,vop3b,vintr,
+ mubuf,mtbuf,flat,vmult")
+ (const_string "vector")]
+ (const_string "unknown")))
+
+; All vector instructions runs in 64bit threads which as predicated by EXEC
+; registers. EXEC register is assumed to be non-zero first time program starts.
+; Because scalar operations are often also offloaded to the vector unit, we
+; use modes switching to model three states of EXEC register:
+; - any: Instruction do not care
+; - init: Instruction must be executed with exec passed to function
+; - subinit: Instruction must be executed with non-zero EXEC which is subset
+; of what was passed to function
+; - full: Instruction expect exec to be all ones (for full sized vector ops)
+
+(define_attr "exec" "any,unknown,init,subinit,user,full"
+ (cond [(eq_attr "unit" "scalar")
+ (const_string "any")
+ (eq_attr "mode" "none,BI,QI,HI,SI,DI,TI,HF,SF,DF,V2SF,V3DI,V4SF,
+ V2DF,V3SF,V4SI,V2DI,V3SI")
+ (const_string "init")
+ (eq_attr "mode" "V64SI,V64DI,V64SF,V64DF,V64HF,V64HI,V64QI")
+ (const_string "full")]
+ (const_string "unknown")))
+
+;; -------------------------------------------------------------------------
+;; Iterators useful across the wole machine description
+;; -------------------------------------------------------------------------
+
+; Scalar registers can generally be operated in SI or DI.
+(define_mode_iterator SIDI_MODE [SI DI])
+; Double reg vector operations
+(define_mode_iterator V64_INT_MODE [DI V64DI])
+; Default pointer is either 32bit or 64bit
+;(define_mode_iterator P [(SI "Pmode == SImode") (DI "Pmode == DImode")])
+;(define_mode_iterator V64P [(V64SI "Pmode == SImode") (V64DI "Pmode == DImode")])
+
+(include "predicates.md")
+(include "constraints.md")
+
+;; -------------------------------------------------------------------------
+;; Attibutes.
+;; -------------------------------------------------------------------------
+
+; Translate RTX code into GCN instruction mnemonics
+(define_code_attr mnemonic
+ [(minus "sub%i")
+ (plus "add%i")
+ (ashift "lshl%b")
+ (lshiftrt "lshr%b")
+ (ashiftrt "ashr%i")
+ (and "and%b")
+ (ior "or%b")
+ (xor "xor%b")
+ (mult "mul%i")
+ (smin "min%i")
+ (smax "max%i")
+ (umin "min%u")
+ (umax "max%u")])
+
+(define_code_attr revmnemonic
+ [(minus "subrev%i")
+ (ashift "lshlrev%b")
+ (lshiftrt "lshrrev%b")
+ (ashiftrt "ashrrev%i")])
+
+; Translate RTX code into corresponding expander name.
+(define_code_attr expander
+ [(and "and")
+ (ior "ior")
+ (xor "xor")
+ (plus "add")
+ (minus "sub")
+ (ashift "ashl")
+ (lshiftrt "lshr")
+ (ashiftrt "ashr")
+ (mult "mul")
+ (smin "smin")
+ (smax "smax")
+ (umin "umin")
+ (umax "umax")])
+
+(define_mode_attr native_mode
+ [(V64DI "V64SI") (DI "SI")])
+;; -------------------------------------------------------------------------
+;; nop instruction
+;; -------------------------------------------------------------------------
+
+(define_insn "nop"
+ [(const_int 0)]
+ ""
+ "s_nop\t0x")
+
+;; -------------------------------------------------------------------------
+;; Trap
+;; -------------------------------------------------------------------------
+
+(define_insn "trap"
+ [(trap_if (const_int 1) (const_int 0))]
+ ""
+ "s_trap")
+
+;; -------------------------------------------------------------------------
+;; Moves
+;; -------------------------------------------------------------------------
+
+; All modes GCN support move operation in
+(define_mode_iterator S_MOV_MODE [BI SI DI SF DF V2SI V2SF V64SI V64DI])
+; All modes GCN support move in single vector or scalar reg
+(define_mode_iterator S_MOV1_MODE [BI SI SF])
+; All modes GCN support move in pair of vector or scalar regs
+(define_mode_iterator S_MOV2_MODE [DI DF])
+; Loads and sctores can do 3,4,8,16 double words.
+(define_mode_iterator S_MOV3_MODE [V3SI V3SF])
+(define_mode_iterator S_MOV4_MODE [TI V4SI V4SF V2DI V2DF])
+(define_mode_iterator S_MOV34_MODE [V3SI V3SF TI V4SI V4SF V2DI V2DF])
+
+;; All modes we support moves in.
+(define_mode_iterator ES_MOV_MODE [BI QI HI SI DI SF DF V2SI V2SF V64SI V64DI])
+(define_expand "mov<mode>"
+ [(set (match_operand:ES_MOV_MODE 0 "gcn_simple_mem_or_reg_operand")
+ (match_operand:ES_MOV_MODE 1 "gcn_load_operand"))]
+ ""
+{
+ if (gcn_expand_mov(<MODE>mode, operands[0], operands[1]))
+ DONE;
+})
+
+; We need BImode move so we can reload flags registers
+
+;(define_insn "*movbi"
+ ;[(set (match_operand:BI 0 "register_operand" "=SD ,v")
+ ;;(match_operand:BI 1 "nonmemory_operand" "SSA,vSSA"))]
+ ;""
+ ;"@
+ ;s_mov_b32\t%0, %1
+ ;v_mov_b32\t%0, %1"
+ ;[(set_attr "type" "sop1,vop1")
+ ;(set_attr "mode" "SI")])
+
+; 32bit move pattern
+
+; FIXME: Ducumentation describe s_store_dword, but it is not accepted by
+; LLVM AS
+(define_insn "*mov<mode>_scalar"
+ [(set (match_operand:S_MOV1_MODE 0 "register_operand" "=SD,SD, SD,Sm,v, Sm")
+ (match_operand:S_MOV1_MODE 1 "gcn_load_operand" "SSA,SSJ,B, RS,SS,v"))]
+ ""
+ "@
+ s_mov_b32\t%0, %1
+ s_movk_i32\t%0, %1
+ s_mov_b32\t%0, %1
+ s_load_dword\t%0, %A1\n\ts_waitcnt\tlgkmcnt(0)
+ v_writelane_b32\t%0, %1, 0
+ v_readlane_b32\t%0, %1, 0"
+ [(set_attr "type" "sop1,sopk,sop1,smem,vop3a,vop3a")
+ (set_attr "mode" "SI")])
+
+; FIXME: Why readfirstlane has both VOP1 and VOP3 encoding?
+(define_insn_and_split "*mov<mode>"
+ [(set (match_operand:S_MOV1_MODE 0 "nonimmediate_operand" "=SD, SD, SD,^Sm,v, v, Sm,^v,^Sm,RD,RF,v ,v ")
+ (match_operand:S_MOV1_MODE 1 "gcn_load_operand" " SSA,SSJ,B, RS,vB,SS,v, SS,v ,v ,v ,RD,RF"))
+ (use (match_operand:DI 2 "gcn_exec_operand" " n, n, n, n, e, e, e, O, O ,e ,e ,e ,e "))]
+ "(register_operand (operands[0], VOIDmode) || register_operand (operands[1], VOIDmode))
+ && (REG_P (operands[2]) || !gcn_vgpr_move_p (operands[0], operands[1]))"
+ "@
+ #
+ #
+ #
+ #
+ v_mov_b32\t%0, %1
+ v_mov_b32\t%0, %1
+ v_readfirstlane_b32\t%0, %1
+ #
+ #
+ ds_write_b32\t%A0, %1%O0
+ flat_store%s0\t%A0, %1
+ ds_read_b32\t%0, %A1%O1
+ flat_load_dword\t%0, %A1\n\ts_waitcnt\tlgkmcnt(0),vmcnt(0)"
+ "gcn_sgpr_move_p (operands[0], operands[1])
+ || (reload_completed && GET_CODE (operands[2]) == CONST_INT)"
+ [(set (match_dup 0) (match_dup 1))]
+{}
+ [(set_attr "type" "sop1,sopk,sop1,smem,vop1,vop1,vop1,vop3a,vop3a,dsmem,flat,dsmem,flat")
+ (set_attr "mode" "SI")])
+
+(define_insn_and_split "*mov<mode>_scalar"
+ [(set (match_operand:S_MOV2_MODE 0 "register_operand" "=SD,SD,Sm,v, Sm")
+ (match_operand:S_MOV2_MODE 1 "general_operand" "SSB,SSn,RS,SS,v"))]
+ ""
+ "@
+ s_mov%b0\t%0, %1
+ #
+ s_load%s0\t%0, %A1\n\ts_waitcnt\tlgkmcnt(0)
+ #
+ #"
+ "(reload_completed && !gcn_sgpr_move_p (operands[0], operands[1]))
+ || (GET_CODE (operands[1]) == CONST_INT && !gcn_constant_p (operands[1]))"
+ [(set (match_dup 0) (match_dup 1))
+ (set (match_dup 2) (match_dup 3))]
+{
+ gcn_split_operands (<MODE>mode, operands, 2, 2);
+}
+ [(set_attr "type" "sop1,mult,smem,vmult,vmult")
+ (set_attr "mode" "DI,SI,DI,SI,SI")])
+
+(define_insn_and_split "*mov<mode>_scalar"
+ [(set (match_operand:S_MOV2_MODE 0 "nonimmediate_operand" "=SD,SD,Sm,v, v ,^v, ^Sm,RD,RF,v ,v ")
+ (match_operand:S_MOV2_MODE 1 "general_operand" "SSB,SSn,RS,vn,SS, SS,v ,v ,v ,RD,RF"))
+ (use (match_operand:DI 2 "gcn_exec_operand" "n,n,n,e,e,O,O,e,e,e,e"))]
+ "(register_operand (operands[0], VOIDmode) || register_operand (operands[1], VOIDmode))
+ && (REG_P (operands[2]) || !gcn_vgpr_move_p (operands[0], operands[1]))"
+ "@
+ #
+ #
+ #
+ #
+ #
+ #
+ #
+ ds_write_b64\t%A0, %1%O0
+ flat_store%s0\t%A0, %1
+ ds_read_b64\t%0, %A1%O1
+ flat_load_dword\t%0, %A1\n\ts_waitcnt\tlgkmcnt(0),vmcnt(0)"
+ "gcn_sgpr_move_p (operands[0], operands[1])
+ || (reload_completed && GET_CODE (operands[2]) == CONST_INT)"
+ [(set (match_dup 0) (match_dup 1))]
+{}
+ [(set_attr "type" "sop1,mult,smem,vmult,vmult,vmult,vmult,dsmem,flat,dsmem,flat")
+ (set_attr "mode" "DI,SI,DI,SI,SI,SI,SI,DI,DI,DI,DI")])
+
+
+; Split to move by pieces but be sure that we do not split s_mov_b64
+(define_split
+ [(set (match_operand:S_MOV2_MODE 0 "register_operand")
+ (match_operand:S_MOV2_MODE 1 "nonmemory_operand"))
+ (use (match_operand:DI 2 "register_operand"))]
+ "gcn_vgpr_register_operand (operands[0], VOIDmode)
+ || gcn_vgpr_register_operand (operands[1], VOIDmode)"
+ [(parallel [(set (match_dup 0) (match_dup 1)) (use (match_dup 4))])
+ (parallel [(set (match_dup 2) (match_dup 3)) (use (match_dup 4))])]
+{
+ operands[4] = operands[2];
+ gcn_split_operands (<MODE>mode, operands, 2, 2);
+})
+
+(define_insn "*mov<mode>"
+ [(set (match_operand:S_MOV34_MODE 0 "register_operand" "=SD,SD, SD,Sm,v, ^v, ^Sm")
+ (match_operand:S_MOV34_MODE 1 "gcn_load_operand" "SSA,SSJ,B, RS,vB, SS, v"))
+ (use (match_operand:DI 2 "gcn_exec_operand" "n,n,n,n,e,e,n"))]
+ ""
+ "@
+ #
+ #
+ #
+ s_load%s0\t%0, %A1\n\ts_waitcnt\tlgkmcnt(0)
+ #
+ #
+ #"
+ [(set_attr "type" "mult,mult,mult,smem,vmult,vmult,vmult")
+ (set_attr "mode" "DI,SI,SI,<MODE>,SI,SI,SI")])
+
+; Watch for partial overlap - register triples are not aligned.
+(define_split
+ [(set (match_operand:S_MOV3_MODE 0 "register_operand")
+ (match_operand:S_MOV3_MODE 1 "nonmemory_operand"))]
+ "(REG_P (operands[0]) && REG_P (operands[1]))
+ && REGNO (operands[0]) > REGNO (operands[1])
+ && REGNO (operands[0]) < REGNO (operands[1] + 3)"
+ [(set (match_dup 0) (match_dup 1))
+ (set (match_dup 2) (match_dup 3))
+ (set (match_dup 4) (match_dup 5))]
+{
+ gcn_split_operands (<MODE>mode, operands, 3, 2);
+})
+
+
+(define_split
+ [(set (match_operand:S_MOV3_MODE 0 "register_operand")
+ (match_operand:S_MOV3_MODE 1 "nonmemory_operand"))]
+ "reload_completed"
+ [(set (match_dup 0) (match_dup 1))
+ (set (match_dup 2) (match_dup 3))]
+{
+ operands[2] = gcn_operand_part (GET_MODE (operands[0]), operands[0], 2);
+ operands[3] = gcn_operand_part (GET_MODE (operands[0]), operands[1], 2);
+ operands[0] = gcn_operand_doublepart
+ (GET_MODE (operands[0]), operands[0], 0);
+ operands[1] = gcn_operand_doublepart
+ (GET_MODE (operands[0]), operands[1], 0);
+})
+
+(define_split
+ [(set (match_operand:S_MOV4_MODE 0 "register_operand")
+ (match_operand:S_MOV4_MODE 1 "nonmemory_operand"))]
+ "reload_completed"
+ [(set (match_dup 0) (match_dup 1))
+ (set (match_dup 2) (match_dup 3))]
+{
+ operands[2] = gcn_operand_doublepart (GET_MODE (operands[0]), operands[0], 1);
+ operands[3] = gcn_operand_doublepart (GET_MODE (operands[0]), operands[1], 1);
+ operands[0] = gcn_operand_doublepart (GET_MODE (operands[0]), operands[0], 0);
+ operands[1] = gcn_operand_doublepart (GET_MODE (operands[0]), operands[1], 0);
+})
+
+;; -------------------------------------------------------------------------
+;; Prologue/Epilogue
+;; -------------------------------------------------------------------------
+
+(define_insn "prologue_use"
+ [(unspec_volatile [(match_operand 0)] UNSPECV_PROLOGUE_USE)]
+ ""
+ "")
+
+;; Flat scratch initialization.
+(define_expand "prologue"
+ [(const_int 0)]
+ ""
+{
+ gcn_expand_prologue ();
+})
+
+(define_expand "epilogue"
+ [(simple_return)])
+
+;; -------------------------------------------------------------------------
+;; Control flow
+;; -------------------------------------------------------------------------
+
+(define_insn "jump"
+ [(set (pc)
+ (label_ref (match_operand 0)))]
+ ""
+ "s_branch\t%0"
+ [(set_attr "type" "sopp")])
+
+(define_insn "cjump"
+ [(set (pc)
+ (if_then_else (match_operator:BI 1 "gcn_conditional_operator"
+ [(match_operand:BI
+ 2 "gcn_conditional_register_operand" "ca")
+ (const_int 0)])
+ (label_ref (match_operand 0))
+ (pc)))]
+ ""
+ "s_cbranch%C1\t%0"
+ [(set_attr "type" "sopp")])
+
+(define_insn "return"
+ [(simple_return)]
+ ""
+ "s_endpgm"
+ [(set_attr "type" "sopp")])
+
+;; -------------------------------------------------------------------------
+;; Conditionals
+;; -------------------------------------------------------------------------
+
+(define_insn "cstoresi4"
+ [(set (match_operand:BI 0 "gcn_conditional_register_operand" "=cs,cs,cs,cs")
+ (match_operator:BI 1 "gcn_compare_operator"
+ [(match_operand:SI 2 "gcn_alu_operand" "SSA,SSA,SSB,SS")
+ (match_operand:SI 3 "gcn_alu_operand" "SSA,SSK,SS ,SSB")]))]
+ ""
+ "@
+ s_cmp%D1\t%2, %3
+ s_cmpk%D1\t%2, %3
+ s_cmp%D1\t%2, %3
+ s_cmp%D1\t%2, %3"
+ [(set_attr "type" "sopc,sopk,sopk,sopk")
+ (set_attr "mode" "SI")])
+
+(define_expand "cbranchsi4"
+ [(match_operator 0 "gcn_compare_operator"
+ [(match_operand:SI 1 "gcn_alu_operand")
+ (match_operand:SI 2 "gcn_alu_operand")])
+ (match_operand 3)]
+ ""
+{
+ rtx cc = gen_reg_rtx (BImode);
+ emit_insn (gen_cstoresi4 (cc, operands[0], operands[1], operands[2]));
+ emit_jump_insn (gen_cjump (operands[3], gen_rtx_NE (BImode, cc, const0_rtx),
+ cc));
+ DONE;
+})
+
+; FIXME: s_cmp_eq_64 is not accepted by llvm-as.
+
+(define_insn "cstoredi4_vec_and_scalar"
+ [(set (match_operand:BI 0 "gcn_conditional_register_operand" "=cs,cv,cv")
+ (match_operator:BI 1 "gcn_compare_64bit_operator"
+ [(match_operand:DI 2 "gcn_alu_operand" "%SSA,vSS,v")
+ (match_operand:DI 3 "gcn_alu_operand" " SSB,v, vB")]))
+ (use (match_operand:DI 4 "gcn_exec_operand" "n,e,e"))]
+ "0"
+ "@
+ #
+ v_cmp%E1\tvcc, %3, %2
+ v_cmp%E1\tvcc, %3, %2"
+ [(set_attr "type" "unknown,vopc,vopc")
+ (set_attr "mode" "DI")])
+
+(define_split
+ [(set (match_operand:BI 0 "gcn_conditional_register_operand" "")
+ (match_operator:BI 1 "gcn_compare_64bit_operator"
+ [(match_operand:DI 2 "gcn_alu_operand" "")
+ (match_operand:DI 3 "gcn_alu_operand" "")]))
+ (use (match_operand:DI 4 "" ""))]
+ "REG_P (operands[0]) && REGNO (operands[0]) == SCC_REG && 0"
+ [(set (match_dup 0)
+ (match_op_dup 1 [(match_dup 2) (match_dup 3)]))])
+
+(define_insn "cstoredi4_scalar"
+ [(set (match_operand:BI 0 "gcn_conditional_register_operand" "=cs")
+ (match_operator 1 "gcn_compare_64bit_operator"
+ [(match_operand:DI 2 "gcn_alu_operand" "%SSA")
+ (match_operand:DI 3 "gcn_alu_operand" "SSB")]))]
+ "0"
+ "s_cmp%D1\t%2, %3"
+ [(set_attr "type" "vopc")
+ (set_attr "mode" "DI")])
+
+(define_insn "cstoredi4"
+ [(set (match_operand:BI 0 "gcn_conditional_register_operand" "=cv")
+ (match_operator:BI 1 "gcn_compare_operator"
+ [(match_operand:DI 2 "gcn_alu_operand" "vSSA")
+ (match_operand:DI 3 "gcn_alu_operand" "v")]))
+ (use (match_operand:DI 4 "gcn_exec_operand" "e"))]
+ ""
+ "v_cmp%E1\tvcc, %2, %3"
+ [(set_attr "type" "vopc")
+ (set_attr "mode" "DI")])
+
+(define_expand "cbranchdi4"
+ [(match_operator 0 "gcn_compare_operator"
+ [(match_operand:DI 1 "gcn_alu_operand")
+ (match_operand:DI 2 "gcn_alu_operand")])
+ (match_operand 3)]
+ ""
+{
+ rtx cc = gen_reg_rtx (BImode);
+ emit_insn (gen_cstoredi4 (cc, operands[0], operands[1], operands[2],
+ gcn_scalar_exec ()));
+ emit_jump_insn (gen_cjump (operands[3], gen_rtx_NE (BImode, cc, const0_rtx),
+ cc));
+ DONE;
+})
+
+;; -------------------------------------------------------------------------
+;; ALU special cases: Plus
+;; -------------------------------------------------------------------------
+
+(define_code_iterator plus_minus [plus minus])
+
+(define_expand "<expander>si3"
+ [(parallel [(set (match_operand:SI 0 "register_operand")
+ (plus_minus:SI
+ (match_operand:SI 1 "gcn_alu_operand")
+ (match_operand:SI 2 "gcn_alu_operand")))
+ (use (match_dup 3))
+ (clobber (reg:BI SCC_REG))
+ (clobber (reg:CC VCC_REG))])]
+ ""
+{
+ operands[3] = gcn_scalar_exec ();
+})
+
+(define_insn "*addsi3_vec_and_scalar"
+ [(set (match_operand:SI 0 "register_operand" "=SD, SD, SD, v")
+ (plus:SI
+ (match_operand:SI 1 "gcn_alu_operand" "%SSA,0 ,SSA,v")
+ (match_operand:SI 2 "gcn_alu_operand" " SSA,SSJ,B, vBSS")))
+ (use (match_operand:DI 3 "gcn_exec_operand" "n,n,n,v"))
+ (clobber (reg:BI SCC_REG))
+ (clobber (reg:CC VCC_REG))]
+ ""
+ "#")
+
+(define_predicate "plus_minus_operator"
+ (match_code "plus,minus"))
+
+(define_split
+ [(set (match_operand:SIDI_MODE 0 "register_operand" "")
+ (match_operator:SIDI_MODE 3 "plus_minus_operator"
+ [(match_operand:SIDI_MODE 1 "gcn_alu_operand" "")
+ (match_operand:SIDI_MODE 2 "gcn_alu_operand" "")]))
+ (use (match_operand:DI 4 "" ""))
+ (clobber (reg:BI SCC_REG))
+ (clobber (reg:CC VCC_REG))]
+ "gcn_sgpr_register_operand (operands[0], VOIDmode)"
+ [(parallel [(set (match_dup 0) (match_op_dup 3 [(match_dup 1) (match_dup 2)]))
+ (clobber (reg:BI SCC_REG))])])
+
+(define_split
+ [(set (match_operand:SIDI_MODE 0 "register_operand" "")
+ (match_operator:SIDI_MODE 3 "plus_minus_operator"
+ [(match_operand:SIDI_MODE 1 "gcn_alu_operand" "")
+ (match_operand:SIDI_MODE 2 "gcn_alu_operand" "")]))
+ (use (match_operand:DI 4 "" ""))
+ (clobber (reg:BI SCC_REG))
+ (clobber (reg:CC VCC_REG))]
+ "gcn_vgpr_register_operand (operands[0], VOIDmode)"
+ [(parallel [(set (match_dup 0) (match_op_dup 3 [(match_dup 1) (match_dup 2)]))
+ (use (match_dup 4))
+ (clobber (reg:CC VCC_REG))])])
+
+(define_insn "*addsi3_scalar"
+ [(set (match_operand:SI 0 "register_operand" "=SD, SD, SD")
+ (plus:SI
+ (match_operand:SI 1 "gcn_alu_operand" "%SSA,0 ,SSA")
+ (match_operand:SI 2 "gcn_alu_operand" " SSA,SSJ,B")))
+ (clobber (reg:BI SCC_REG))]
+ ""
+ "@
+ s_add_i32\t%0, %1, %2
+ s_addk_i32\t%0, %2
+ s_add_i32\t%0, %1, %2"
+ [(set_attr "type" "sop2,sopk,sop2")
+ (set_attr "mode" "SI")])
+
+
+; FIXME: Implemented for now only in SCC registers. Vectors are analogous
+; but we need to expand into vector patterns.
+(define_expand "adddi3"
+ [(match_operand:DI 0 "register_operand")
+ (match_operand:DI 1 "gcn_alu_operand")
+ (match_operand:DI 2 "gcn_alu_operand")]
+ ""
+{
+ emit_insn (gen_addsi3_scalar_carry (
+ gcn_operand_part (DImode, operands[0], 0),
+ gcn_operand_part (DImode, operands[1], 0),
+ gcn_operand_part (DImode, operands[2], 0)));
+ rtx val = gcn_operand_part (DImode, operands[2], 1);
+ if (val != const0_rtx)
+ emit_insn (gen_addcsi3_scalar (
+ gcn_operand_part (DImode, operands[0], 1),
+ gcn_operand_part (DImode, operands[1], 1),
+ gcn_operand_part (DImode, operands[2], 1)));
+ else
+ emit_insn (gen_addcsi3_scalar_zero (
+ gcn_operand_part (DImode, operands[0], 1),
+ gcn_operand_part (DImode, operands[1], 1)));
+ DONE;
+})
+
+(define_insn "addsi3_scalar_carry"
+ [(set (match_operand:SI 0 "register_operand" "=SD")
+ (plus:SI (match_operand:SI 1 "gcn_alu_operand" "%SSA")
+ (match_operand:SI 2 "gcn_alu_operand" "SSB")))
+ (set (reg:BI SCC_REG)
+ (ltu:BI (plus:SI (match_dup 1)
+ (match_dup 2))
+ (match_dup 1)))]
+ ""
+ "s_add_u32\t%0, %1, %2"
+ [(set_attr "type" "sop2")
+ (set_attr "mode" "SI")])
+
+(define_insn "addsi3_scalar_carry_cst"
+ [(set (match_operand:SI 0 "register_operand" "=SD")
+ (plus:SI (match_operand:SI 1 "gcn_alu_operand" "SSA")
+ (match_operand:SI 2 "const_int_operand" "n")))
+ (set (reg:BI SCC_REG)
+ (geu:BI (plus:SI (match_dup 1)
+ (match_dup 2))
+ (match_operand:SI 3 "const_int_operand" "n")))]
+ "INTVAL (operands[2]) == -INTVAL (operands[3])"
+ "s_add_u32\t%0, %1, %2"
+ [(set_attr "type" "sop2")
+ (set_attr "mode" "SI")])
+
+(define_insn "addcsi3_scalar"
+ [(set (match_operand:SI 0 "register_operand" "=SD")
+ (plus:SI (plus:SI (zero_extend:SI (reg:BI SCC_REG))
+ (match_operand:SI 1 "gcn_alu_operand" "%SSA"))
+ (match_operand:SI 2 "gcn_alu_operand" "SSB")))
+ (set (reg:BI SCC_REG)
+ (ior:BI (ltu:BI (plus:SI (plus:SI (zero_extend:SI (reg:BI SCC_REG))
+ (match_dup 1))
+ (match_dup 2))
+ (match_dup 2))
+ (ltu:BI (plus:SI (zero_extend:SI (reg:BI SCC_REG)) (match_dup 1))
+ (match_dup 1))))]
+ ""
+ "s_addc_u32\t%0, %1, %2"
+ [(set_attr "type" "sop2")
+ (set_attr "mode" "SI")])
+
+(define_insn "addcsi3_scalar_zero"
+ [(set (match_operand:SI 0 "register_operand" "=SD")
+ (plus:SI (zero_extend:SI (reg:BI SCC_REG))
+ (match_operand:SI 1 "gcn_alu_operand" "SSA")))
+ (set (reg:BI SCC_REG)
+ (ltu:BI (plus:SI (zero_extend:SI (reg:BI SCC_REG)) (match_dup 1))
+ (match_dup 1)))]
+ ""
+ "s_addc_u32\t%0, %1, 0"
+ [(set_attr "type" "sop2")
+ (set_attr "mode" "SI")])
+
+;; -------------------------------------------------------------------------
+;; ALU special cases: Minus
+;; -------------------------------------------------------------------------
+
+(define_expand "subdi3"
+ [(match_operand:DI 0 "register_operand")
+ (match_operand:DI 1 "gcn_alu_operand")
+ (match_operand:DI 2 "gcn_alu_operand")]
+ ""
+{
+ emit_insn (gen_subsi3_scalar_carry (
+ gcn_operand_part (DImode, operands[0], 0),
+ gcn_operand_part (DImode, operands[1], 0),
+ gcn_operand_part (DImode, operands[2], 0)));
+ rtx val = gcn_operand_part (DImode, operands[2], 1);
+ if (val != const0_rtx)
+ emit_insn (gen_subcsi3_scalar (
+ gcn_operand_part (DImode, operands[0], 1),
+ gcn_operand_part (DImode, operands[1], 1),
+ gcn_operand_part (DImode, operands[2], 1)));
+ else
+ emit_insn (gen_subcsi3_scalar_zero (
+ gcn_operand_part (DImode, operands[0], 1),
+ gcn_operand_part (DImode, operands[1], 1)));
+ DONE;
+})
+
+(define_insn "subsi3_scalar_carry"
+ [(set (match_operand:SI 0 "register_operand" "=SD, SD")
+ (minus:SI (match_operand:SI 1 "gcn_alu_operand" "SSA,SSB")
+ (match_operand:SI 2 "gcn_alu_operand" "SSB,SSA")))
+ (set (reg:BI SCC_REG)
+ (gtu:BI (minus:SI (match_dup 1)
+ (match_dup 2))
+ (match_dup 1)))]
+ ""
+ "s_sub_u32\t%0, %1, %2"
+ [(set_attr "type" "sop2")
+ (set_attr "mode" "SI")])
+
+(define_insn "subsi3_scalar_carry_cst"
+ [(set (match_operand:SI 0 "register_operand" "=SD")
+ (minus:SI (match_operand:SI 1 "gcn_alu_operand" "SSA")
+ (match_operand:SI 2 "const_int_operand" "n")))
+ (set (reg:BI SCC_REG)
+ (leu:BI (minus:SI (match_dup 1)
+ (match_dup 2))
+ (match_operand:SI 3 "const_int_operand" "n")))]
+ "INTVAL (operands[2]) == -INTVAL (operands[3])"
+ "s_sub_u32\t%0, %1, %2"
+ [(set_attr "type" "sop2")
+ (set_attr "mode" "SI")])
+
+(define_insn "subcsi3_scalar"
+ [(set (match_operand:SI 0 "register_operand" "=SD, SD")
+ (minus:SI (minus:SI (zero_extend:SI (reg:BI SCC_REG))
+ (match_operand:SI 1 "gcn_alu_operand" "SSA,SSB"))
+ (match_operand:SI 2 "gcn_alu_operand" "SSB,SSA")))
+ (set (reg:BI SCC_REG)
+ (ior:BI (gtu:BI (minus:SI (minus:SI (zero_extend:SI (reg:BI SCC_REG))
+ (match_dup 1))
+ (match_dup 2))
+ (match_dup 1))
+ (gtu:BI (minus:SI (zero_extend:SI (reg:BI SCC_REG)) (match_dup 1))
+ (match_dup 1))))]
+ ""
+ "s_subb_u32\t%0, %1, %2"
+ [(set_attr "type" "sop2")
+ (set_attr "mode" "SI")])
+
+(define_insn "subcsi3_scalar_zero"
+ [(set (match_operand:SI 0 "register_operand" "=SD")
+ (minus:SI (zero_extend:SI (reg:BI SCC_REG))
+ (match_operand:SI 1 "gcn_alu_operand" "SSA")))
+ (set (reg:BI SCC_REG)
+ (gtu:BI (minus:SI (zero_extend:SI (reg:BI SCC_REG)) (match_dup 1))
+ (match_dup 1)))]
+ ""
+ "s_subb_u32\t%0, %1, 0"
+ [(set_attr "type" "sop2")
+ (set_attr "mode" "SI")])
+
+;; -------------------------------------------------------------------------
+;; ALU: mult
+;; -------------------------------------------------------------------------
+
+(define_expand "mulsi3"
+ [(set (match_operand:SI 0 "register_operand")
+ (mult:SI
+ (match_operand:SI 1 "gcn_alu_operand")
+ (match_operand:SI 2 "gcn_alu_operand")))
+ (use (match_dup 3))]
+""
+{
+ operands[3] = gcn_scalar_exec ();
+})
+
+; Vector multiply has vop3a encoding, but no corresponding vop2a, so no long
+; immediate.
+(define_insn_and_split "*mulsi3_vec_and_scalar"
+ [(set (match_operand:SI 0 "register_operand" "=SD, SD,SD, v")
+ (mult:SI
+ (match_operand:SI 1 "gcn_alu_operand" "%SSA,0 ,SSA,v")
+ (match_operand:SI 2 "gcn_alu_operand" " SSA,J, B, vASS")))
+ (use (match_operand:DI 3 "gcn_exec_operand" "n,n,n,e"))]
+ ""
+ "@
+ #
+ #
+ #
+ v_mul_lo_i32\t%0, %1, %2"
+ "gcn_sgpr_register_operand (operands[0], VOIDmode)"
+ [(set (match_operand:SI 0 "register_operand" "=SD, SD,SD, v")
+ (mult:SI
+ (match_operand:SI 1 "gcn_alu_operand" "%SSA,0 ,SSA,vSS")
+ (match_operand:SI 2 "gcn_alu_operand" " SSA,J, B, vA")))]
+{}
+ [(set_attr "type" "sop2,sopk,sop2,vop3a")
+ (set_attr "mode" "SI")])
+
+(define_insn "*mulsi3_scalar"
+ [(set (match_operand:SI 0 "register_operand" "=SD, SD,SD")
+ (mult:SI
+ (match_operand:SI 1 "gcn_alu_operand" "%SSA,0 ,SSA")
+ (match_operand:SI 2 "gcn_alu_operand" " SSA,J, B")))]
+ ""
+ "@
+ s_mul_i32\t%0, %1, %2
+ s_mulk_i32\t%0, %2
+ s_mul_i32\t%0, %1, %2"
+ [(set_attr "type" "sop2,sopk,sop2")
+ (set_attr "mode" "SI")])
+
+;; -------------------------------------------------------------------------
+;; ALU the generic 32bit case
+;; -------------------------------------------------------------------------
+
+(define_code_iterator vec_and_scalar [and ior xor ashift lshiftrt
+ ashiftrt smin smax umin umax])
+
+(define_expand "<expander>si3"
+ [(parallel [(set (match_operand:SI 0 "register_operand")
+ (vec_and_scalar:SI
+ (match_operand:SI 1 "gcn_alu_operand")
+ (match_operand:SI 2 "gcn_alu_operand")))
+ (use (match_dup 3))
+ (clobber (reg:BI SCC_REG))])]
+ ""
+{
+ operands[3] = gcn_scalar_exec ();
+})
+
+;; No plus and mult - they have variant with 16bit immediate and thus are defined later.
+(define_code_iterator vec_and_scalar_com [and ior xor smin smax umin umax])
+
+(define_insn "*<expander>si3"
+ [(set (match_operand:SI 0 "register_operand" "=SD,v")
+ (vec_and_scalar_com:SI (match_operand:SI 1 "gcn_alu_operand" "%SSA,v")
+ (match_operand:SI 2 "gcn_alu_operand" "SSB,vSSB")))
+ (use (match_operand:DI 3 "gcn_exec_operand" "n,e"))
+ (clobber (reg:BI SCC_REG))]
+ ""
+ "#"
+ [(set_attr "type" "sop2,vop2")
+ (set_attr "mode" "SI")])
+
+(define_insn "*<expander>si3_scalar"
+ [(set (match_operand:SI 0 "register_operand" "=SD,v")
+ (vec_and_scalar_com:SI (match_operand:SI 1 "register_operand" "%SSA,v")
+ (match_operand:SI 2 "gcn_alu_operand" "SSB,vSSB")))
+ (clobber (reg:BI SCC_REG))]
+ ""
+ "s_<mnemonic>0\t%0, %1, %2"
+ [(set_attr "type" "sop2")
+ (set_attr "mode" "SI")])
+
+(define_code_iterator vec_and_scalar_nocom [ashift lshiftrt ashiftrt])
+
+(define_insn "*<expander>si3_vec_and_scalar"
+ [(set (match_operand:SI 0 "register_operand" "=SD,SD,v")
+ (vec_and_scalar_nocom:SI (match_operand:SI 1 "gcn_alu_operand" "SSB,SSA,v")
+ (match_operand:SI 2 "gcn_alu_operand" "SSA,SSB,vSSB")))
+ (use (match_operand:DI 3 "gcn_exec_operand" "n,n,e"))
+ (clobber (reg:BI SCC_REG))]
+ ""
+ "#"
+ [(set_attr "type" "sop2,sop2,vop2")
+ (set_attr "mode" "SI")])
+
+(define_insn "<expander>si3_scalar"
+ [(set (match_operand:SI 0 "register_operand" "=SD,SD")
+ (vec_and_scalar_nocom:SI (match_operand:SI 1 "gcn_alu_operand" "SSB,SSA")
+ (match_operand:SI 2 "gcn_alu_operand" "SSA,SSB")))
+ (clobber (reg:BI SCC_REG))]
+ ""
+ "@
+ s_<mnemonic>0\t%0, %1, %2
+ s_<mnemonic>0\t%0, %1, %2"
+ [(set_attr "type" "sop2,sop2")
+ (set_attr "mode" "SI")])
+
+;; -------------------------------------------------------------------------
+;; ALU the generic 64bit case
+;; -------------------------------------------------------------------------
+
+(define_code_iterator vec_and_scalar64_com [and ior xor])
+
+(define_expand "<expander>di3"
+ [(set (match_operand:DI 0 "register_operand")
+ (vec_and_scalar64_com:DI (match_operand:DI 1 "gcn_alu_operand")
+ (match_operand:DI 2 "gcn_alu_operand")))
+ (use (match_dup 3))
+ (clobber (reg:BI SCC_REG))]
+ ""
+{
+ operands[3] = gcn_scalar_exec ();
+})
+
+(define_insn "*<expander>di3_vec_and_scalar"
+ [(set (match_operand:DI 0 "register_operand" "=SD,v")
+ (vec_and_scalar64_com:DI (match_operand:DI 1 "register_operand" "%SSA,v")
+ (match_operand:DI 2 "gcn_alu_operand" "SSB,vSSB")))
+ (use (match_operand:DI 3 "gcn_exec_operand" "n,e"))
+ (clobber (reg:BI SCC_REG))]
+ ""
+ "#"
+ [(set_attr "type" "sop2,vop2")
+ (set_attr "mode" "DI")])
+
+(define_insn "*<expander>di3_scalar"
+ [(set (match_operand:DI 0 "register_operand" "=SD")
+ (vec_and_scalar64_com:DI (match_operand:DI 1 "register_operand" "%SSA")
+ (match_operand:DI 2 "gcn_alu_operand" "SSB")))
+ (clobber (reg:BI SCC_REG))]
+ ""
+ "s_<mnemonic>0\t%0, %1, %2"
+ [(set_attr "type" "sop2")
+ (set_attr "mode" "DI")])
+
+(define_expand "<expander>di3"
+ [(set (match_operand:DI 0 "register_operand")
+ (vec_and_scalar_nocom:DI (match_operand:DI 1 "gcn_alu_operand")
+ (match_operand:DI 2 "gcn_alu_operand")))
+ (use (match_dup 3))
+ (clobber (reg:BI SCC_REG))]
+ ""
+{
+ operands[3] = gcn_scalar_exec ();
+})
+
+(define_insn "*<expander>di3_vec_and_scalar"
+ [(set (match_operand:DI 0 "register_operand" "=SD,SD,v")
+ (vec_and_scalar_nocom:DI
+ (match_operand:DI 1 "gcn_alu_operand" "SSB,SSA,v")
+ (match_operand:SI 2 "gcn_alu_operand" "SSA,SSB,vSSB")))
+ (clobber (reg:BI SCC_REG))]
+ ""
+ "s_<mnemonic>0\t%0, %1, %2"
+ [(set_attr "type" "sop2,sop2,vop2")
+ (set_attr "mode" "DI")])
+
+(define_insn "*<expander>di3_scalar"
+ [(set (match_operand:DI 0 "register_operand" "=SD,SD")
+ (vec_and_scalar_nocom:DI
+ (match_operand:DI 1 "gcn_alu_operand" "SSB,SSA")
+ (match_operand:SI 2 "gcn_alu_operand" "SSA,SSB")))
+ (clobber (reg:BI SCC_REG))]
+ ""
+ "s_<mnemonic>0\t%0, %1, %2"
+ [(set_attr "type" "sop2,sop2")
+ (set_attr "mode" "DI")])
+
+;; -------------------------------------------------------------------------
+;; Generic splitters choosing proper insn variant once we decided on using
+;; vector or scalar ALU
+;; -------------------------------------------------------------------------
+
+(define_split
+ [(set (match_operand 0 "gcn_sgpr_register_operand")
+ (match_operator 4 "binary_operator"
+ [(match_operand 1 "gcn_alu_operand")
+ (match_operand 2 "gcn_alu_operand")]))
+ (use (match_operand:DI 3 ""))
+ (clobber (reg:BI SCC_REG))]
+ ""
+ [(parallel [(set (match_dup 0)
+ (match_op_dup 4 [(match_dup 1) (match_dup 2)]))
+ (clobber (reg:BI SCC_REG))])])
+
+(define_split
+ [(set (match_operand 0 "gcn_vgpr_register_operand")
+ (match_operator 4 "binary_operator"
+ [(match_operand 1 "gcn_alu_operand")
+ (match_operand 2 "gcn_alu_operand")]))
+ (use (match_operand:DI 3 ""))
+ (clobber (reg:BI SCC_REG))]
+ ""
+ [(parallel [(set (match_dup 0)
+ (match_op_dup 4 [(match_dup 1) (match_dup 2)]))
+ (use (match_dup 3))])])
+
+
+(include "gcn-valu.md")
diff --git a/gcc/config/gcn/gcn.opt b/gcc/config/gcn/gcn.opt
new file mode 100644
index 00000000000..ffb5547adbd
--- /dev/null
+++ b/gcc/config/gcn/gcn.opt
@@ -0,0 +1,40 @@
+; Options for the GCN port of the compiler.
+
+; Copyright (C) 2016-2017 Free Software Foundation, Inc.
+;
+; This file is part of GCC.
+;
+; GCC is free software; you can redistribute it and/or modify it under
+; the terms of the GNU General Public License as published by the Free
+; Software Foundation; either version 3, or (at your option) any later
+; version.
+;
+; GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+; WARRANTY; without even the implied warranty of MERCHANTABILITY or
+; FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+; for more details.
+;
+; You should have received a copy of the GNU General Public License
+; along with GCC; see the file COPYING3. If not see
+; <http://www.gnu.org/licenses/>.
+
+HeaderInclude
+config/gcn/gcn-opts.h
+
+Enum
+Name(gpu_type) Type(enum processor_type)
+GCN GPU type to use:
+
+EnumValue
+Enum(gpu_type) String(carrizo) Value(PROCESSOR_CARRIZO)
+
+EnumValue
+Enum(gpu_type) String(fiji) Value(PROCESSOR_FIJI)
+
+march=
+Target RejectNegative Joined ToLower Enum(gpu_type) Var(gcn_arch) Init(PROCESSOR_CARRIZO)
+Specify the name of the target GPU.
+
+mtune=
+Target RejectNegative Joined ToLower Enum(gpu_type) Var(gcn_tune) Init(PROCESSOR_CARRIZO)
+Specify the name of the target GPU.
diff --git a/gcc/config/gcn/predicates.md b/gcc/config/gcn/predicates.md
new file mode 100644
index 00000000000..137c39c69be
--- /dev/null
+++ b/gcc/config/gcn/predicates.md
@@ -0,0 +1,167 @@
+;; Predicate definitions for GCN.
+;; Copyright (C) 2016-2017 Free Software Foundation, Inc.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+;;
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+;; GNU General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3. If not see
+;; <http://www.gnu.org/licenses/>.
+;; Return true if VALUE can be stored in a sign extended immediate field.
+
+(define_predicate "gcn_16bit_immediate_operand"
+ (and (match_code "const_int")
+ (match_test "satisfies_constraint_I (op)")))
+
+(define_predicate "gcn_conditional_register_operand"
+ (match_operand 0 "register_operand")
+{
+ if (GET_CODE (op) == SUBREG)
+ op = SUBREG_REG (op);
+
+ if (!REG_P (op))
+ return 0;
+
+ return REGNO (op) == VCCZ_REG
+ || REGNO (op) == SCC_REG
+ || REGNO (op) == EXECZ_REG
+ || REGNO (op) >= FIRST_PSEUDO_REGISTER;
+})
+
+(define_predicate "gcn_sgpr_register_operand"
+ (match_operand 0 "register_operand")
+{
+ if (GET_CODE (op) == SUBREG)
+ op = SUBREG_REG (op);
+
+ if (!REG_P (op))
+ return 0;
+
+ return REGNO (op) < FIRST_PSEUDO_REGISTER && !VGPR_REGNO_P (REGNO (op));
+})
+
+(define_predicate "gcn_simple_mem_or_reg_operand"
+ (match_operand 0 "nonimmediate_operand")
+{
+ if (GET_CODE (op) == MEM
+ && GET_CODE (XEXP (op, 0)) != REG)
+ return false;
+ return true;
+})
+
+(define_predicate "gcn_vgpr_register_operand"
+ (match_operand 0 "register_operand")
+{
+ if (GET_CODE (op) == SUBREG)
+ op = SUBREG_REG (op);
+
+ if (!REG_P (op))
+ return 0;
+
+ return VGPR_REGNO_P (REGNO (op));
+})
+
+(define_predicate "gcn_inline_immediate_operand"
+ (match_code "const_int,const_double,const_vector")
+{
+ return gcn_inline_constant_p (op);
+})
+
+(define_predicate "gcn_vec0_operand"
+ (match_code "const_vector")
+{
+ return CONST_VECTOR_ELT (op, 0) == const0_rtx && gcn_inline_constant_p (op);
+})
+
+(define_predicate "gcn_vec1_operand"
+ (match_code "const_vector")
+{
+ return CONST_VECTOR_ELT (op, 0) == const1_rtx && gcn_inline_constant_p (op);
+})
+
+(define_predicate "gcn_32bit_immediate_operand"
+ (match_code "const_int,const_double,const_vector")
+{
+ return gcn_constant_p (op);
+})
+
+; LRA works smoother when exec values are immediate constants
+; prior register allocation.
+(define_predicate "gcn_exec_operand"
+ (ior (match_operand 0 "register_operand")
+ (match_code "const_int")))
+
+(define_predicate "gcn_exec_reg_operand"
+ (match_operand 0 "register_operand"))
+
+(define_predicate "gcn_load_operand"
+ (ior (match_operand 0 "nonimmediate_operand")
+ (match_operand 0 "gcn_32bit_immediate_operand")))
+
+(define_predicate "gcn_alu_operand"
+ (ior (match_operand 0 "register_operand")
+ (match_operand 0 "gcn_32bit_immediate_operand")))
+
+(define_predicate "gcn_ds_memory_operand"
+ (and (match_code "mem")
+ (and (match_test "MEM_ADDR_SPACE (op) == ADDR_SPACE_LDS || MEM_ADDR_SPACE (op) == ADDR_SPACE_GDS")
+ (match_operand 0 "memory_operand"))))
+
+(define_predicate "gcn_valu_dst_operand"
+ (ior (match_operand 0 "register_operand")
+ (match_operand 0 "gcn_ds_memory_operand")))
+
+(define_predicate "gcn_valu_src0_operand"
+ (ior (match_operand 0 "register_operand")
+ (ior (match_operand 0 "gcn_32bit_immediate_operand")
+ (match_operand 0 "gcn_ds_memory_operand"))))
+
+(define_predicate "gcn_valu_src1_operand"
+ (match_operand 0 "register_operand"))
+
+(define_predicate "gcn_valu_src1com_operand"
+ (ior (match_operand 0 "register_operand")
+ (match_operand 0 "gcn_32bit_immediate_operand")))
+
+(define_predicate "gcn_conditional_operator"
+ (match_code "eq,ne"))
+
+(define_predicate "gcn_compare_64bit_operator"
+ (match_code "eq,ne"))
+
+(define_predicate "gcn_compare_operator"
+ (match_code "eq,ne,gt,ge,lt,le,gtu,geu,ltu,leu"))
+
+(define_predicate "vec_and_scalar_commutative_64bit_operator"
+ (match_code "and,ior,xor"))
+
+(define_predicate "vec_and_scalar_64bit_operator"
+ (match_code "and,ior,xor,ashift,lshiftrt,ashiftrt"))
+
+(define_predicate "binary_operator"
+ (match_code "and,ior,xor,ashift,lshiftrt,ashiftrt"))
+
+(define_predicate "gcn_register_or_unspec_operand"
+ (ior (match_operand 0 "register_operand")
+ (and (match_code "unspec")
+ (match_test "XINT (op, 1) == UNSPEC_VECTOR"))))
+
+(define_predicate "gcn_register_ds_or_unspec_operand"
+ (ior (match_operand 0 "register_operand")
+ (ior (match_operand 0 "gcn_ds_memory_operand")
+ (and (match_code "unspec")
+ (match_test "XINT (op, 1) == UNSPEC_VECTOR")))))
+
+(define_predicate "gcn_buffer_memory_operand"
+ (and (match_code "mem")
+ (and (match_test "MEM_ADDR_SPACE (op) == ADDR_SPACE_SCRATCH")
+ (match_operand 0 "memory_operand"))))
diff --git a/gcc/config/gcn/t-gcn-elf b/gcc/config/gcn/t-gcn-elf
new file mode 100644
index 00000000000..2b378e2f8eb
--- /dev/null
+++ b/gcc/config/gcn/t-gcn-elf
@@ -0,0 +1,21 @@
+# Copyright (C) 2016-2017 Free Software Foundation, Inc.
+#
+# This file is free software; you can redistribute it and/or modify it under
+# the terms of the GNU General Public License as published by the Free
+# Software Foundation; either version 3 of the License, or (at your option)
+# any later version.
+#
+# This file is distributed in the hope that it will be useful, but WITHOUT
+# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+# for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GCC; see the file COPYING3. If not see
+# <http://www.gnu.org/licenses/>.
+
+# Multi-lib support.
+
+gcn-c.o: $(srcdir)/config/gcn/gcn-c.c
+ $(COMPILE) $<
+ $(POSTCOMPILE)
diff --git a/gcc/emit-rtl.c b/gcc/emit-rtl.c
index 6951f61703b..e86c7ba4f4b 100644
--- a/gcc/emit-rtl.c
+++ b/gcc/emit-rtl.c
@@ -1688,6 +1688,9 @@ get_mem_align_offset (rtx mem, unsigned int align)
tree expr;
unsigned HOST_WIDE_INT offset;
+ if (MEM_ALIGN (mem) >= align)
+ return 0;
+
/* This function can't use
if (!MEM_EXPR (mem) || !MEM_OFFSET_KNOWN_P (mem)
|| (MAX (MEM_ALIGN (mem),
diff --git a/gcc/explow.c b/gcc/explow.c
index 50074e281ed..b38664c0e36 100644
--- a/gcc/explow.c
+++ b/gcc/explow.c
@@ -401,7 +401,14 @@ memory_address_addr_space (machine_mode mode, rtx x, addr_space_t as)
/* By passing constant addresses through registers
we get a chance to cse them. */
if (! cse_not_expected && CONSTANT_P (x) && CONSTANT_ADDRESS_P (x))
- x = force_reg (address_mode, x);
+ {
+ x = force_reg (address_mode, x);
+ /* Most usually register is fine for memory expression.
+ For GCN scalar registers are not always valid way to address
+ memory and needs to be converted to vector pointers. */
+ if (!memory_address_addr_space_p (mode, x, as))
+ return memory_address_addr_space (mode, x, as);
+ }
/* We get better cse by rejecting indirect addressing at this stage.
Let the combiner create indirect addresses where appropriate.
diff --git a/gcc/expr.c b/gcc/expr.c
index 0e8216ba7d0..8abee9434bd 100644
--- a/gcc/expr.c
+++ b/gcc/expr.c
@@ -10458,6 +10458,8 @@ expand_expr_real_1 (tree exp, rtx target, machine_mode tmode,
VOIDmode,
modifier == EXPAND_SUM ? EXPAND_NORMAL : modifier,
NULL, true);
+#if 0
+ /* FIXME: Deal with this in another way. */
/* If the field has a mode, we want to access it in the
field's mode, not the computed mode.
@@ -10470,6 +10472,7 @@ expand_expr_real_1 (tree exp, rtx target, machine_mode tmode,
else if (GET_MODE (op0) == VOIDmode)
op0 = adjust_address (op0, BLKmode, 0);
}
+#endif
mode2
= CONSTANT_P (op0) ? TYPE_MODE (TREE_TYPE (tem)) : GET_MODE (op0);
diff --git a/gcc/ira-costs.c b/gcc/ira-costs.c
index 2cd102a0810..8c6c1346f3b 100644
--- a/gcc/ira-costs.c
+++ b/gcc/ira-costs.c
@@ -2145,7 +2145,13 @@ setup_allocno_class_and_costs (void)
if (num < 0)
{
num = cost_classes_ptr->hard_regno_index[hard_regno];
- ira_assert (num >= 0);
+ /* If the class cannot hold register of given mode,
+ we do not care. */
+ if (num == -1)
+ {
+ reg_costs[j] = 0;
+ continue;
+ }
}
reg_costs[j] = COSTS (costs, i)->cost[num];
}
diff --git a/gcc/ira.c b/gcc/ira.c
index 08a1cc550b2..f91fcb9e5fc 100644
--- a/gcc/ira.c
+++ b/gcc/ira.c
@@ -1637,14 +1637,16 @@ ira_init_register_move_cost (machine_mode mode)
*p2 != LIM_REG_CLASSES; p2++)
if (ira_class_hard_regs_num[*p2] > 0
&& (ira_reg_class_max_nregs[*p2][mode]
- <= ira_class_hard_regs_num[*p2]))
+ <= ira_class_hard_regs_num[*p2])
+ && contains_reg_of_mode[*p2][mode])
cost = MAX (cost, ira_register_move_cost[mode][cl1][*p2]);
for (p1 = &reg_class_subclasses[cl1][0];
*p1 != LIM_REG_CLASSES; p1++)
if (ira_class_hard_regs_num[*p1] > 0
&& (ira_reg_class_max_nregs[*p1][mode]
- <= ira_class_hard_regs_num[*p1]))
+ <= ira_class_hard_regs_num[*p1])
+ && contains_reg_of_mode[*p1][mode])
cost = MAX (cost, ira_register_move_cost[mode][*p1][cl2]);
ira_assert (cost <= 65535);
diff --git a/gcc/lra-constraints.c b/gcc/lra-constraints.c
index b1d864fb974..13d2bf2e0a1 100644
--- a/gcc/lra-constraints.c
+++ b/gcc/lra-constraints.c
@@ -2657,7 +2657,8 @@ process_alt_operands (int only_alternative)
constant into memory and it will then win since
we don't want to have a different alternative
match then. */
- if (! (REG_P (op) && REGNO (op) >= FIRST_PSEUDO_REGISTER))
+ if (! (REG_P (op) && REGNO (op) >= FIRST_PSEUDO_REGISTER)
+ && !CONSTANT_P (op))
{
if (lra_dump_file != NULL)
fprintf
@@ -2754,8 +2755,10 @@ process_alt_operands (int only_alternative)
#endif
/* Input reloads can be inherited more often than output
reloads can be removed, so penalize output
- reloads. */
- if (!REG_P (op) || curr_static_id->operand[nop].type != OP_IN)
+ reloads and also input reloads that are not constants or
+ registers. */
+ if ((!REG_P (op) && !CONSTANT_P (op))
+ || curr_static_id->operand[nop].type != OP_IN)
{
if (lra_dump_file != NULL)
fprintf
@@ -4170,6 +4173,7 @@ curr_insn_transform (bool check_only_p)
&& (curr_insn_set == NULL_RTX
|| !((REG_P (SET_SRC (curr_insn_set))
|| MEM_P (SET_SRC (curr_insn_set))
+ || GET_CODE (SET_SRC (curr_insn_set)) == CONST_INT
|| GET_CODE (SET_SRC (curr_insn_set)) == SUBREG)
&& (REG_P (SET_DEST (curr_insn_set))
|| MEM_P (SET_DEST (curr_insn_set))
diff --git a/gcc/lra-int.h b/gcc/lra-int.h
index 405071708b1..5a519b02492 100644
--- a/gcc/lra-int.h
+++ b/gcc/lra-int.h
@@ -189,9 +189,9 @@ struct lra_static_insn_data
int commutative;
/* Number of operands, duplications, and alternatives of the
insn. */
- char n_operands;
- char n_dups;
- char n_alternatives;
+ unsigned char n_operands;
+ unsigned char n_dups;
+ unsigned char n_alternatives;
/* Insns in machine description (or clobbers in asm) may contain
explicit hard regs which are not operands. The following list
describes such hard registers. */
diff --git a/gcc/print-rtl.c b/gcc/print-rtl.c
index dc8d9800de5..7a9acfc9a73 100644
--- a/gcc/print-rtl.c
+++ b/gcc/print-rtl.c
@@ -343,7 +343,20 @@ rtx_writer::print_rtx_operand_codes_E_and_V (const_rtx in_rtx, int idx)
m_sawclose = 1;
for (int j = 0; j < XVECLEN (in_rtx, idx); j++)
- print_rtx (XVECEXP (in_rtx, idx, j));
+ {
+ int j1;
+
+ print_rtx (XVECEXP (in_rtx, idx, j));
+ for (j1 = j + 1; j1 < XVECLEN (in_rtx, idx); j1++)
+ if (XVECEXP (in_rtx, idx, j) != XVECEXP (in_rtx, idx, j1))
+ break;
+
+ if (j1 != j + 1)
+ {
+ fprintf (m_outfile, " repeated %ix", j1 - j);
+ j = j1;
+ }
+ }
m_indent -= 2;
}
diff --git a/gcc/recog.h b/gcc/recog.h
index 07c60feffc6..469e5ab1109 100644
--- a/gcc/recog.h
+++ b/gcc/recog.h
@@ -224,26 +224,16 @@ struct recog_data_d
/* Gives the operand number that was duplicated in the Nth
duplicate-appearance of an operand. */
- char dup_num[MAX_DUP_OPERANDS];
-
- /* ??? Note that these are `char' instead of `unsigned char' to (try to)
- avoid certain lossage from K&R C, wherein `unsigned char' default
- promotes to `unsigned int' instead of `int' as in ISO C. As of 1999,
- the most common places to bootstrap from K&R C are SunOS and HPUX,
- both of which have signed characters by default. The only other
- supported natives that have both K&R C and unsigned characters are
- ROMP and Irix 3, and neither have been seen for a while, but do
- continue to consider unsignedness when performing arithmetic inside
- a comparison. */
+ unsigned char dup_num[MAX_DUP_OPERANDS];
/* The number of operands of the insn. */
- char n_operands;
+ unsigned char n_operands;
/* The number of MATCH_DUPs in the insn. */
- char n_dups;
+ unsigned char n_dups;
/* The number of alternatives in the constraints for the insn. */
- char n_alternatives;
+ unsigned char n_alternatives;
/* True if insn is ASM_OPERANDS. */
bool is_asm;
@@ -368,10 +358,10 @@ struct insn_data_d
const insn_gen_fn genfun;
const struct insn_operand_data *const operand;
- const char n_generator_args;
- const char n_operands;
- const char n_dups;
- const char n_alternatives;
+ const unsigned char n_generator_args;
+ const unsigned char n_operands;
+ const unsigned char n_dups;
+ const unsigned char n_alternatives;
const char output_format;
};
diff --git a/gcc/reload1.c b/gcc/reload1.c
index e993749a000..9292a7c1fd5 100644
--- a/gcc/reload1.c
+++ b/gcc/reload1.c
@@ -3604,7 +3604,7 @@ elimination_costs_in_insn (rtx_insn *insn)
rtx old_set = single_set (insn);
int i;
rtx orig_operand[MAX_RECOG_OPERANDS];
- rtx orig_dup[MAX_RECOG_OPERANDS];
+ rtx orig_dup[MAX_DUP_OPERANDS];
struct elim_table *ep;
rtx plus_src, plus_cst_src;
bool sets_reg_p;
diff --git a/gcc/simplify-rtx.c b/gcc/simplify-rtx.c
index ef414797af5..06a3f3cd718 100644
--- a/gcc/simplify-rtx.c
+++ b/gcc/simplify-rtx.c
@@ -5391,6 +5391,65 @@ simplify_cond_clz_ctz (rtx x, rtx_code cmp_code, rtx true_val, rtx false_val)
return NULL_RTX;
}
+/* X is an operand number OP of VEC_MERGE operation with MASK.
+ Try to simplify using knowledge that values outside of MASK
+ will not be used. */
+
+rtx
+simplify_merge_mask (rtx x, rtx mask, int op)
+{
+ gcc_assert (VECTOR_MODE_P (GET_MODE (x)));
+ int nunits = GET_MODE_NUNITS (GET_MODE (x));
+ if (GET_CODE (x) == VEC_MERGE && rtx_equal_p (XEXP (x, 2), mask))
+ {
+ if (!side_effects_p (XEXP (x, 1 - op)))
+ return XEXP (x, op);
+ }
+ if (side_effects_p (x))
+ return NULL_RTX;
+ if (UNARY_P (x)
+ && VECTOR_MODE_P (GET_MODE (XEXP (x, 0)))
+ && GET_MODE_NUNITS (GET_MODE (XEXP (x, 0))) == nunits)
+ {
+ rtx top0 = simplify_merge_mask (XEXP (x, 0), mask, op);
+ if (top0)
+ return simplify_gen_unary (GET_CODE (x), GET_MODE (x), top0,
+ GET_MODE (XEXP (x, 0)));
+ }
+ if (BINARY_P (x)
+ && VECTOR_MODE_P (GET_MODE (XEXP (x, 0)))
+ && GET_MODE_NUNITS (GET_MODE (XEXP (x, 0))) == nunits
+ && VECTOR_MODE_P (GET_MODE (XEXP (x, 1)))
+ && GET_MODE_NUNITS (GET_MODE (XEXP (x, 1))) == nunits)
+ {
+ rtx top0 = simplify_merge_mask (XEXP (x, 0), mask, op);
+ rtx top1 = simplify_merge_mask (XEXP (x, 1), mask, op);
+ if (top0 || top1)
+ return simplify_gen_binary (GET_CODE (x), GET_MODE (x),
+ top0 ? top0 : XEXP (x, 0),
+ top1 ? top1 : XEXP (x, 1));
+ }
+ if (GET_RTX_CLASS (GET_CODE (x)) == RTX_TERNARY
+ && VECTOR_MODE_P (GET_MODE (XEXP (x, 0)))
+ && GET_MODE_NUNITS (GET_MODE (XEXP (x, 0))) == nunits
+ && VECTOR_MODE_P (GET_MODE (XEXP (x, 1)))
+ && GET_MODE_NUNITS (GET_MODE (XEXP (x, 1))) == nunits
+ && VECTOR_MODE_P (GET_MODE (XEXP (x, 2)))
+ && GET_MODE_NUNITS (GET_MODE (XEXP (x, 2))) == nunits)
+ {
+ rtx top0 = simplify_merge_mask (XEXP (x, 0), mask, op);
+ rtx top1 = simplify_merge_mask (XEXP (x, 1), mask, op);
+ rtx top2 = simplify_merge_mask (XEXP (x, 2), mask, op);
+ if (top0 || top1)
+ return simplify_gen_ternary (GET_CODE (x), GET_MODE (x),
+ GET_MODE (XEXP (x, 0)),
+ top0 ? top0 : XEXP (x, 0),
+ top1 ? top1 : XEXP (x, 1),
+ top2 ? top2 : XEXP (x, 2));
+ }
+ return NULL_RTX;
+}
+
/* Simplify CODE, an operation with result mode MODE and three operands,
OP0, OP1, and OP2. OP0_MODE was the mode of OP0 before it became
@@ -5672,6 +5731,28 @@ simplify_ternary_operation (enum rtx_code code, machine_mode mode,
&& !side_effects_p (op2) && !side_effects_p (op1))
return op0;
+ if (!side_effects_p (op2))
+ {
+ rtx top0 = simplify_merge_mask (op0, op2, 0);
+ rtx top1 = simplify_merge_mask (op1, op2, 1);
+ if (top0 || top1)
+ return simplify_gen_ternary (code, mode, mode,
+ top0 ? top0 : op0,
+ top1 ? top1 : op1, op2);
+ }
+
+ if (GET_CODE (op0) == VEC_MERGE
+ && rtx_equal_p (op2, XEXP (op0, 2))
+ && !side_effects_p (XEXP (op0, 1)) && !side_effects_p (op2))
+ return simplify_gen_ternary (code, mode, mode,
+ XEXP (op0, 0), op1, op2);
+
+ if (GET_CODE (op1) == VEC_MERGE
+ && rtx_equal_p (op2, XEXP (op1, 2))
+ && !side_effects_p (XEXP (op0, 0)) && !side_effects_p (op2))
+ return simplify_gen_ternary (code, mode, mode,
+ XEXP (op0, 1), op1, op2);
+
break;
default: