diff options
-rwxr-xr-x | config.sub | 9 | ||||
-rw-r--r-- | gcc/combine.c | 12 | ||||
-rw-r--r-- | gcc/common/config/gcn/gcn-common.c | 29 | ||||
-rw-r--r-- | gcc/config.gcc | 27 | ||||
-rw-r--r-- | gcc/config/gcn/constraints.md | 129 | ||||
-rw-r--r-- | gcc/config/gcn/gcn-builtins.def | 69 | ||||
-rw-r--r-- | gcc/config/gcn/gcn-c.c | 24 | ||||
-rw-r--r-- | gcc/config/gcn/gcn-hsa.h | 54 | ||||
-rw-r--r-- | gcc/config/gcn/gcn-modes.def | 84 | ||||
-rw-r--r-- | gcc/config/gcn/gcn-opts.h | 27 | ||||
-rw-r--r-- | gcc/config/gcn/gcn-protos.h | 87 | ||||
-rw-r--r-- | gcc/config/gcn/gcn-valu.md | 982 | ||||
-rw-r--r-- | gcc/config/gcn/gcn.c | 2905 | ||||
-rw-r--r-- | gcc/config/gcn/gcn.h | 718 | ||||
-rw-r--r-- | gcc/config/gcn/gcn.md | 1068 | ||||
-rw-r--r-- | gcc/config/gcn/gcn.opt | 40 | ||||
-rw-r--r-- | gcc/config/gcn/predicates.md | 167 | ||||
-rw-r--r-- | gcc/config/gcn/t-gcn-elf | 21 | ||||
-rw-r--r-- | gcc/emit-rtl.c | 3 | ||||
-rw-r--r-- | gcc/explow.c | 9 | ||||
-rw-r--r-- | gcc/expr.c | 3 | ||||
-rw-r--r-- | gcc/ira-costs.c | 8 | ||||
-rw-r--r-- | gcc/ira.c | 6 | ||||
-rw-r--r-- | gcc/lra-constraints.c | 10 | ||||
-rw-r--r-- | gcc/lra-int.h | 6 | ||||
-rw-r--r-- | gcc/print-rtl.c | 15 | ||||
-rw-r--r-- | gcc/recog.h | 26 | ||||
-rw-r--r-- | gcc/reload1.c | 2 | ||||
-rw-r--r-- | gcc/simplify-rtx.c | 81 |
29 files changed, 6590 insertions, 31 deletions
diff --git a/config.sub b/config.sub index 62b82599d98..94cc4d93d90 100755 --- a/config.sub +++ b/config.sub @@ -263,6 +263,7 @@ case $basic_machine in | fido | fr30 | frv | ft32 \ | h8300 | h8500 | hppa | hppa1.[01] | hppa2.0 | hppa2.0[nw] | hppa64 \ | hexagon \ + | amdgcn \ | i370 | i860 | i960 | ia64 \ | ip2k | iq2000 \ | k1om \ @@ -671,6 +672,9 @@ case $basic_machine in fx2800) basic_machine=i860-alliant ;; + amdgcn) + basic_machine=amdgcn-unknown + ;; genix) basic_machine=ns32k-ns ;; @@ -1543,6 +1547,8 @@ case $os in ;; -ios) ;; + -amdhsa) + ;; -none) ;; *) @@ -1571,6 +1577,9 @@ case $basic_machine in spu-*) os=-elf ;; + amdgcn-*) + os=-amdhsa + ;; *-acorn) os=-riscix1.2 ;; diff --git a/gcc/combine.c b/gcc/combine.c index 8dc62b57266..e47a4f20a62 100644 --- a/gcc/combine.c +++ b/gcc/combine.c @@ -8441,7 +8441,12 @@ gen_lowpart_or_truncate (machine_mode mode, rtx x) { /* Bit-cast X into an integer mode. */ if (!SCALAR_INT_MODE_P (GET_MODE (x))) - x = gen_lowpart (int_mode_for_mode (GET_MODE (x)), x); + { + enum machine_mode imode = int_mode_for_mode (GET_MODE (x)); + if (imode == BLKmode) + return gen_rtx_CLOBBER (mode, const0_rtx); + x = gen_lowpart (imode, x); + } x = simplify_gen_unary (TRUNCATE, int_mode_for_mode (mode), x, GET_MODE (x)); } @@ -11446,6 +11451,11 @@ gen_lowpart_for_combine (machine_mode omode, rtx x) if (omode == imode) return x; + /* This can happen when there is no integer mode corresponding + to a size of vector mode. */ + if (omode == BLKmode) + goto fail; + /* We can only support MODE being wider than a word if X is a constant integer or has a mode the same size. */ if (GET_MODE_SIZE (omode) > UNITS_PER_WORD diff --git a/gcc/common/config/gcn/gcn-common.c b/gcc/common/config/gcn/gcn-common.c new file mode 100644 index 00000000000..6cafc371b26 --- /dev/null +++ b/gcc/common/config/gcn/gcn-common.c @@ -0,0 +1,29 @@ +/* Common hooks for GCN + Copyright (C) 2016-2017 Free Software Foundation, Inc. + + This file is free software; you can redistribute it and/or modify it under + the terms of the GNU General Public License as published by the Free + Software Foundation; either version 3 of the License, or (at your option) + any later version. + + This file is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + for more details. + + You should have received a copy of the GNU General Public License + along with GCC; see the file COPYING3. If not see + <http://www.gnu.org/licenses/>. */ + +#include "config.h" +#include "system.h" +#include "coretypes.h" +#include "tm.h" +#include "common/common-target.h" +#include "common/common-target-def.h" +#include "opts.h" +#include "flags.h" +#include "params.h" + + +struct gcc_targetm_common targetm_common = TARGETM_COMMON_INITIALIZER; diff --git a/gcc/config.gcc b/gcc/config.gcc index fdf4cb845ae..215784030e3 100644 --- a/gcc/config.gcc +++ b/gcc/config.gcc @@ -311,6 +311,9 @@ alpha*-*-*) cpu_type=alpha extra_options="${extra_options} g.opt" ;; +amdgcn*) + cpu_type=gcn + ;; am33_2.0-*-linux*) cpu_type=mn10300 ;; @@ -1258,6 +1261,12 @@ ft32-*-elf) tm_file="dbxelf.h elfos.h newlib-stdint.h ${tm_file}" tmake_file="${tmake_file} ft32/t-ft32" ;; +amdgcn-*-amdhsa) + tm_file="dbxelf.h elfos.h gcn/gcn-hsa.h gcn/gcn.h newlib-stdint.h" + tmake_file="gcn/t-gcn-hsa" + native_system_header_dir=/include + extra_modes=gcn/gcn-modes.def + ;; moxie-*-elf) gas=yes gnu_ld=yes @@ -3928,6 +3937,24 @@ case "${target}" in esac ;; + amdgcn-*-*) + supported_defaults="arch tune" + + for which in arch tune; do + eval "val=\$with_$which" + case ${val} in + "" | carrizo | fiji) + # OK + ;; + *) + echo "Unknown cpu used in --with-$which=$val." 1>&2 + exit 1 + ;; + esac + done + [ "x$with_arch" = x ] && with_arch=carrizo + ;; + hppa*-*-*) supported_defaults="arch schedule" diff --git a/gcc/config/gcn/constraints.md b/gcc/config/gcn/constraints.md new file mode 100644 index 00000000000..5ae42313652 --- /dev/null +++ b/gcc/config/gcn/constraints.md @@ -0,0 +1,129 @@ +;; Constraint definitions for GCN. +;; Copyright (C) 2016-2017 Free Software Foundation, Inc. +;; +;; This file is part of GCC. +;; +;; GCC is free software; you can redistribute it and/or modify +;; it under the terms of the GNU General Public License as published by +;; the Free Software Foundation; either version 3, or (at your option) +;; any later version. +;; +;; GCC is distributed in the hope that it will be useful, +;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;; GNU General Public License for more details. +;; +;; You should have received a copy of the GNU General Public License +;; along with GCC; see the file COPYING3. If not see +;; <http://www.gnu.org/licenses/>. + +(define_constraint "I" + "Inline integer constant" + (and (match_code "const_int") + (match_test "ival >= -16 && ival <= 64"))) + +(define_constraint "J" + "Signed integer 16-bit inline constant" + (and (match_code "const_int") + (match_test "((unsigned HOST_WIDE_INT) ival + 0x8000) < 0x10000"))) + +(define_constraint "K" + "Integer 32-bit constant" + (and (match_code "const_int") + (match_test "trunc_int_for_mode (ival, SImode) == ival"))) + +(define_constraint "O" + "Integer one constant" + (and (match_code "const_int") + (match_test "ival == 1"))) + +(define_constraint "G" + "Inline floating point constant." + (and (match_code "const_double") + (match_test "gcn_inline_fp_constant_p (op, false) > 0"))) + +(define_constraint "H" + "floating point constant representable as inline or 32bit immediate." + (and (match_code "const_double") + (match_test "gcn_fp_constant_p (op, false) > 0"))) + +(define_constraint "A" + "Inline immediate parameter" + (and (match_code "const_int,const_double,const_vector") + (match_test "gcn_inline_constant_p (op)"))) + +(define_constraint "B" + "Inline immediate parameter" + (and (match_code "const_int,const_double,const_vector") + (match_test "gcn_constant_p (op)"))) + +(define_constraint "U" + "unspecified value" + (match_code "unspec")) + +(define_register_constraint "v" "VGPR_REGS" + "VGPR registers") + +(define_register_constraint "Sg" "SGPR_REGS" + "SGPR registers") + +(define_register_constraint "SD" "SGPR_DST_REGS" + "registers useable as a destination of scalar operation") + +(define_register_constraint "SS" "SGPR_SRC_REGS" + "registers useable as a source of scalar operation") + +(define_register_constraint "Sm" "SGPR_MEM_SRC_REGS" + "registers useable as a source of scalar memory operation") + +(define_register_constraint "Sv" "SGPR_VOP3A_SRC_REGS" + "registers useable as a source of VOP3A instruction") + +(define_register_constraint "ca" "ALL_CONDITIONAL_REGS" + "SCC VCCZ or EXECZ") + +(define_register_constraint "cs" "SCC_CONDITIONAL_REG" + "SCC") + +(define_register_constraint "cv" "VCCZ_CONDITIONAL_REG" + "VCCZ") + +(define_register_constraint "cV" "VCC_CONDITIONAL_REG" + "VCC") + +(define_register_constraint "ce" "EXECZ_CONDITIONAL_REG" + "EXECZ") + +(define_register_constraint "e" "EXEC_MASK_REG" + "EXEC") + +(define_memory_constraint "RB" + "Buffer memory address to scratch memory." + (and (match_code "mem") + (match_test "MEM_ADDR_SPACE (op) == ADDR_SPACE_SCRATCH"))) + +(define_memory_constraint "RF" + "Buffer memory address to flat memory." + (and (match_code "mem") + (match_test "MEM_ADDR_SPACE (op) == ADDR_SPACE_FLAT"))) + +(define_memory_constraint "RS" + "Buffer memory address to scalar flat memory." + (and (match_code "mem") + (match_test "MEM_ADDR_SPACE (op) == ADDR_SPACE_SCALAR_FLAT"))) + +(define_memory_constraint "RL" + "Buffer memory address to LDS memory." + (and (match_code "mem") + (match_test "MEM_ADDR_SPACE (op) == ADDR_SPACE_LDS"))) + +(define_memory_constraint "RG" + "Buffer memory address to GDS memory." + (and (match_code "mem") + (match_test "MEM_ADDR_SPACE (op) == ADDR_SPACE_GDS"))) + +(define_memory_constraint "RD" + "Buffer memory address to GDS or LDS memory." + (and (match_code "mem") + (ior (match_test "MEM_ADDR_SPACE (op) == ADDR_SPACE_GDS") + (match_test "MEM_ADDR_SPACE (op) == ADDR_SPACE_LDS")))) diff --git a/gcc/config/gcn/gcn-builtins.def b/gcc/config/gcn/gcn-builtins.def new file mode 100644 index 00000000000..39a883f9e53 --- /dev/null +++ b/gcc/config/gcn/gcn-builtins.def @@ -0,0 +1,69 @@ +/* Copyright (C) 2016-2017 Free Software Foundation, Inc. + + This file is free software; you can redistribute it and/or modify it under + the terms of the GNU General Public License as published by the Free + Software Foundation; either version 3 of the License, or (at your option) + any later version. + + This file is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + for more details. + + You should have received a copy of the GNU General Public License + along with GCC; see the file COPYING3. If not see + <http://www.gnu.org/licenses/>. */ + +/* The first argument to these macros is the return type of the builtin, + the rest are arguments of the builtin. */ +#define _A1(a) {a, GCN_BTI_END_OF_PARAMS} +#define _A2(a,b) {a, b, GCN_BTI_END_OF_PARAMS} +#define _A3(a,b,c) {a, b, c, GCN_BTI_END_OF_PARAMS} +#define _A4(a,b,c,d) {a, b, c, d, GCN_BTI_END_OF_PARAMS} +#define _A5(a,b,c,d,e) {a, b, c, d, e, GCN_BTI_END_OF_PARAMS} + +DEF_BUILTIN (FLAT_LOAD_INT32, 1 /*CODE_FOR_flat_load_v64si*/, + "flat_load_int32", B_INSN, + _A3 (GCN_BTI_V64SI, GCN_BTI_EXEC, GCN_BTI_V64SI), + gcn_expand_builtin_1) + +DEF_BUILTIN (FLAT_LOAD_PTR_INT32, 2 /*CODE_FOR_flat_load_ptr_v64si */, + "flat_load_ptr_int32", B_INSN, + _A4 (GCN_BTI_V64SI, GCN_BTI_EXEC, GCN_BTI_SIPTR, GCN_BTI_V64SI), + gcn_expand_builtin_1) + +DEF_BUILTIN (FLAT_STORE_PTR_INT32, 3 /*CODE_FOR_flat_store_ptr_v64si */, + "flat_store_ptr_int32", B_INSN, + _A5 (GCN_BTI_VOID, GCN_BTI_EXEC, GCN_BTI_SIPTR, GCN_BTI_V64SI, + GCN_BTI_V64SI), + gcn_expand_builtin_1) + +DEF_BUILTIN (FLAT_LOAD_PTR_FLOAT, 2 /*CODE_FOR_flat_load_ptr_v64sf */, + "flat_load_ptr_float", B_INSN, + _A4 (GCN_BTI_V64SF, GCN_BTI_EXEC, GCN_BTI_SFPTR, GCN_BTI_V64SI), + gcn_expand_builtin_1) + +DEF_BUILTIN (FLAT_STORE_PTR_FLOAT, 3 /*CODE_FOR_flat_store_ptr_v64sf */, + "flat_store_ptr_float", B_INSN, + _A5 (GCN_BTI_VOID, GCN_BTI_EXEC, GCN_BTI_SFPTR, GCN_BTI_V64SI, + GCN_BTI_V64SF), + gcn_expand_builtin_1) + +/* DEF_BUILTIN_BINOP_INT_FP creates many variants of a builtin function for a + given operation. The first argument will give base to the identifier of a + particular builtin, the second will be used to form the name of the patter + used to expand it to and the third will be used to create the user-visible + builtin identifier. */ + +DEF_BUILTIN_BINOP_INT_FP (ADD, add, "add") +DEF_BUILTIN_BINOP_INT_FP (SUB, sub, "sub") + +DEF_BUILTIN_BINOP_INT_FP (AND, and, "and") +DEF_BUILTIN_BINOP_INT_FP (IOR, ior, "or") +DEF_BUILTIN_BINOP_INT_FP (XOR, xor, "xor") + +#undef _A1 +#undef _A2 +#undef _A3 +#undef _A4 +#undef _A5 diff --git a/gcc/config/gcn/gcn-c.c b/gcc/config/gcn/gcn-c.c new file mode 100644 index 00000000000..039060b1134 --- /dev/null +++ b/gcc/config/gcn/gcn-c.c @@ -0,0 +1,24 @@ +/* Copyright (C) 2016-2017 Free Software Foundation, Inc. + + This file is free software; you can redistribute it and/or modify it under + the terms of the GNU General Public License as published by the Free + Software Foundation; either version 3 of the License, or (at your option) + any later version. + + This file is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + for more details. + + You should have received a copy of the GNU General Public License + along with GCC; see the file COPYING3. If not see + <http://www.gnu.org/licenses/>. */ + +#include "config.h" +#include "system.h" +#include "coretypes.h" +#include "target.h" +#include "c-family/c-common.h" +#include "stringpool.h" +#include "langhooks.h" + diff --git a/gcc/config/gcn/gcn-hsa.h b/gcc/config/gcn/gcn-hsa.h new file mode 100644 index 00000000000..ef05db98d80 --- /dev/null +++ b/gcc/config/gcn/gcn-hsa.h @@ -0,0 +1,54 @@ +/* Copyright (C) 2016-2017 Free Software Foundation, Inc. + + This file is free software; you can redistribute it and/or modify it under + the terms of the GNU General Public License as published by the Free + Software Foundation; either version 3 of the License, or (at your option) + any later version. + + This file is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + for more details. + + You should have received a copy of the GNU General Public License + along with GCC; see the file COPYING3. If not see + <http://www.gnu.org/licenses/>. */ + +#ifndef OBJECT_FORMAT_ELF + #error elf.h included before elfos.h +#endif + +#define TEXT_SECTION_NAME ".AMDGPU.config" + +#define BSS_SECTION_ASM_OP "\t.section .bss" + +#define ASM_OUTPUT_ALIGNED_BSS(FILE, DECL, NAME, SIZE, ALIGN) \ + asm_output_aligned_bss (FILE, DECL, NAME, SIZE, ALIGN) + + +#undef STANDARD_STARTFILE_PREFIX_2 +#define STANDARD_STARTFILE_PREFIX_2 "" + +#undef LOCAL_INCLUDE_DIR + +#undef STARTFILE_SPEC +#define STARTFILE_SPEC "" + +#undef ENDFILE_SPEC +#define ENDFILE_SPEC "" + +#define PREFERRED_DEBUGGING_TYPE DWARF2_DEBUG + +#define DWARF2_DEBUGGING_INFO 1 +#define DWARF2_ASM_LINE_DEBUG_INFO 1 + +#define SET_ASM_OP "\t.set\t" + +#undef TARGET_ASM_NAMED_SECTION +#define TARGET_ASM_NAMED_SECTION default_elf_asm_named_section + +#define EH_FRAME_THROUGH_COLLECT2 1 + +#define LINK_SPEC "" + +#define LIB_SPEC "" diff --git a/gcc/config/gcn/gcn-modes.def b/gcc/config/gcn/gcn-modes.def new file mode 100644 index 00000000000..baeb23f5e2f --- /dev/null +++ b/gcc/config/gcn/gcn-modes.def @@ -0,0 +1,84 @@ +/* Copyright (C) 2016-2017 Free Software Foundation, Inc. + + This file is free software; you can redistribute it and/or modify it under + the terms of the GNU General Public License as published by the Free + Software Foundation; either version 3 of the License, or (at your option) + any later version. + + This file is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + for more details. + + You should have received a copy of the GNU General Public License + along with GCC; see the file COPYING3. If not see + <http://www.gnu.org/licenses/>. */ + +/* Half-precision floating point */ +FLOAT_MODE (HF, 2, 0); +/* FIXME: No idea what format it is. */ +ADJUST_FLOAT_FORMAT (HF, &ieee_half_format); + +/* Native vector modes. */ +VECTOR_MODE (INT, QI, 2); /* V2QI */ +VECTOR_MODE (INT, QI, 4); /* V4QI */ +VECTOR_MODE (INT, QI, 8); /* V8QI */ +VECTOR_MODE (INT, QI, 16); /* V16QI */ +VECTOR_MODE (INT, QI, 32); /* V32QI */ +VECTOR_MODE (INT, QI, 64); /* V64QI */ +VECTOR_MODE (INT, HI, 2); /* V2HI */ +VECTOR_MODE (INT, HI, 4); /* V4HI */ +VECTOR_MODE (INT, HI, 8); /* V8HI */ +VECTOR_MODE (INT, HI, 16); /* V16HI */ +VECTOR_MODE (INT, HI, 32); /* V32HI */ +VECTOR_MODE (INT, HI, 64); /* V64HI */ +VECTOR_MODE (INT, SI, 2); /* V2SI */ +VECTOR_MODE (INT, SI, 4); /* V4SI */ +VECTOR_MODE (INT, SI, 8); /* V8SI */ +VECTOR_MODE (INT, SI, 16); /* V16SI */ +VECTOR_MODE (INT, SI, 32); /* V32SI */ +VECTOR_MODE (INT, SI, 64); /* V64SI */ +VECTOR_MODE (INT, DI, 2); /* V2DI */ +VECTOR_MODE (INT, DI, 4); /* V4DI */ +VECTOR_MODE (INT, DI, 8); /* V8DI */ +VECTOR_MODE (INT, DI, 16); /* V16DI */ +VECTOR_MODE (INT, DI, 32); /* V64DI */ +VECTOR_MODE (INT, DI, 64); /* V64DI */ +VECTOR_MODE (INT, TI, 4); /* V4TI */ +VECTOR_MODE (INT, TI, 8); /* V8TI */ +VECTOR_MODE (INT, TI, 16); /* V16TI */ +VECTOR_MODE (INT, TI, 32); /* V32TI */ +VECTOR_MODE (INT, TI, 64); /* V64TI */ +VECTOR_MODE (FLOAT, HF, 2); /* V2HF */ +VECTOR_MODE (FLOAT, HF, 4); /* V4HF */ +VECTOR_MODE (FLOAT, HF, 8); /* V8HF */ +VECTOR_MODE (FLOAT, HF, 16); /* V16HF */ +VECTOR_MODE (FLOAT, HF, 32); /* V64HF */ +VECTOR_MODE (FLOAT, HF, 64); /* V64HF */ +VECTOR_MODE (FLOAT, SF, 2); /* V2SF */ +VECTOR_MODE (FLOAT, SF, 4); /* V4SF */ +VECTOR_MODE (FLOAT, SF, 8); /* V8SF */ +VECTOR_MODE (FLOAT, SF, 16); /* V16SF */ +VECTOR_MODE (FLOAT, SF, 32); /* V64SF */ +VECTOR_MODE (FLOAT, SF, 64); /* V64SF */ +VECTOR_MODE (FLOAT, DF, 2); /* V2DF */ +VECTOR_MODE (FLOAT, DF, 4); /* V4DF */ +VECTOR_MODE (FLOAT, DF, 8); /* V8DF */ +VECTOR_MODE (FLOAT, DF, 16); /* V16DF */ +VECTOR_MODE (FLOAT, DF, 32); /* V64DF */ +VECTOR_MODE (FLOAT, DF, 64); /* V64DF */ + +/* Vector units handle reads independently and thus no large alignment + needed. */ +ADJUST_ALIGNMENT (V64QI, 1); +ADJUST_ALIGNMENT (V64HI, 2); +ADJUST_ALIGNMENT (V64SI, 4); +ADJUST_ALIGNMENT (V64DI, 8); +ADJUST_ALIGNMENT (V64TI, 16); +ADJUST_ALIGNMENT (V64HF, 2); +ADJUST_ALIGNMENT (V64SF, 4); +ADJUST_ALIGNMENT (V64DF, 8); + +/* Register pairs, tripples and quadruples. */ +VECTOR_MODE (INT, SI, 3); /* V3SI */ +VECTOR_MODE (FLOAT, SF, 3); /* V3SF */ diff --git a/gcc/config/gcn/gcn-opts.h b/gcc/config/gcn/gcn-opts.h new file mode 100644 index 00000000000..d0586d62f87 --- /dev/null +++ b/gcc/config/gcn/gcn-opts.h @@ -0,0 +1,27 @@ +/* Copyright (C) 2016-2017 Free Software Foundation, Inc. + + This file is free software; you can redistribute it and/or modify it under + the terms of the GNU General Public License as published by the Free + Software Foundation; either version 3 of the License, or (at your option) + any later version. + + This file is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + for more details. + + You should have received a copy of the GNU General Public License + along with GCC; see the file COPYING3. If not see + <http://www.gnu.org/licenses/>. */ + +#ifndef GCN_OPTS_H +#define GCN_OPTS_H + +/* Which processor to generate code or schedule for. */ +enum processor_type +{ + PROCESSOR_CARRIZO, + PROCESSOR_FIJI +}; + +#endif diff --git a/gcc/config/gcn/gcn-protos.h b/gcc/config/gcn/gcn-protos.h new file mode 100644 index 00000000000..e5dd5280c34 --- /dev/null +++ b/gcc/config/gcn/gcn-protos.h @@ -0,0 +1,87 @@ +/* Copyright (C) 2016-2017 Free Software Foundation, Inc. + + This file is free software; you can redistribute it and/or modify it under + the terms of the GNU General Public License as published by the Free + Software Foundation; either version 3 of the License, or (at your option) + any later version. + + This file is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + for more details. + + You should have received a copy of the GNU General Public License + along with GCC; see the file COPYING3. If not see + <http://www.gnu.org/licenses/>. */ + +#ifndef _GCN_PROTOS_ +#define _GCN_PROTOS_ + +extern bool gcn_hard_regno_mode_ok (int regno, machine_mode mode); +extern int gcn_hard_regno_nregs(int regno, enum machine_mode mode); +extern enum reg_class gcn_regno_reg_class (int regno); +extern bool gcn_cannot_change_mode_class (machine_mode, machine_mode, int); +extern int gcn_inline_fp_constant_p (rtx, bool); +extern bool gcn_fp_constant_p (rtx, bool); +extern bool gcn_inline_constant_p (rtx); +extern bool gcn_constant_p (rtx); +extern bool gcn_vgpr_move_p (rtx, rtx); +extern bool gcn_sgpr_move_p (rtx, rtx); + +extern bool gcn_regno_mode_code_ok_for_base_p + (int, machine_mode, addr_space_t, int, int); +extern reg_class gcn_mode_code_base_reg_class + (machine_mode, addr_space_t, int, int); +extern bool regno_ok_for_index_p (int); +extern void print_operand_address (FILE * file, register rtx addr); +extern void print_operand (FILE * file, rtx x, int code); + +extern rtx gcn_operand_part (machine_mode, rtx, int); +extern rtx gcn_operand_doublepart (machine_mode, rtx, int); +extern void gcn_split_operands (machine_mode, rtx *, int, int); +extern bool gcn_can_split_p (machine_mode, rtx); +extern bool gcn_can_split_operands_p (machine_mode, rtx *, int); +extern rtx gcn_vec_constant (machine_mode, int); +extern rtx gcn_vec_constant (machine_mode, rtx); +extern bool gcn_expand_mov (machine_mode, rtx, rtx); +extern void gcn_expand_vector_init (rtx, rtx); + +extern void gcn_hsa_declare_function_name (FILE *file, + const char *name, tree decl); + +extern rtx gcn_gen_undef (enum machine_mode); +extern rtx gcn_scalar_exec (); +extern rtx gcn_default_exec (); +extern rtx gcn_full_exec (); +extern rtx gcn_full_exec_reg (); + +extern void gcn_expand_prologue (); + +extern int gcn_regmove_natural_size (enum machine_mode); +extern unsigned gcn_frame_pointer_regnum (); + +#ifdef TREE_CODE +extern void gcn_init_cumulative_args (CUMULATIVE_ARGS *, tree, rtx, tree, int); +#endif + +/* Return true if MODE is valid for 1 VGPR register. */ + +inline bool +vgpr_1reg_mode_p (enum machine_mode mode) +{ + return mode == SImode || mode == SFmode || mode == HImode + /*|| mode == V32BImode*/ + || mode == V64HImode || mode == V64SImode + || mode == V64HFmode || mode == V64SFmode || mode == BImode; +} + +/* Return true if MODE is valid for 1 SGPR register. */ + +static inline bool +sgpr_1reg_mode_p (enum machine_mode mode) +{ + return mode == SImode || mode == SFmode || mode == HImode || mode == BImode + /*|| mode == V32BImode*/; +} + +#endif diff --git a/gcc/config/gcn/gcn-valu.md b/gcc/config/gcn/gcn-valu.md new file mode 100644 index 00000000000..894340a46c5 --- /dev/null +++ b/gcc/config/gcn/gcn-valu.md @@ -0,0 +1,982 @@ +;; Copyright (C) 2016-2017 Free Software Foundation, Inc. + +;; This file is free software; you can redistribute it and/or modify it under +;; the terms of the GNU General Public License as published by the Free +;; Software Foundation; either version 3 of the License, or (at your option) +;; any later version. + +;; This file is distributed in the hope that it will be useful, but WITHOUT +;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +;; FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +;; for more details. + +;; You should have received a copy of the GNU General Public License +;; along with GCC; see the file COPYING3. If not see +;; <http://www.gnu.org/licenses/>. + +;; ------------------------------------------------------------------------- +;; Moves +; Vector modes for one vector register +(define_mode_iterator VEC_1REG_MODE + [V64QI V64HI V64SI V64HF V64SF]) + +(define_mode_iterator VEC_1REG_INT_MODE + [V64QI V64HI V64SI]) + +; Vector modes for two vector registers +(define_mode_iterator VEC_2REG_MODE + [V64DI V64DF]) + +; All of above +(define_mode_iterator VEC_REG_MODE + [V64QI V64HI V64SI V64HF V64SF ; Single reg + V64DI V64DF]) ; Double reg + +; Modes supporting integer vector operations +(define_mode_iterator V_INT_MODE [SI V64SI]) + +; Modes we can perform flat memory operations in. +; FIXME: We can also do 96bit, 128bit and 256bit loads into multiple +; registers. Eventually add modes for this. +(define_mode_iterator VEC_FLAT_MODE + [V64QI V64HI V64SI V64HF V64SF ; Single reg + V64DI V64DF]) ; Double reg + +; Modes we can perform scalar flat memory operations in. +(define_mode_iterator SCALAR_FLAT_MODE + [BI QI HI HF SI SF ; single regs + DI DF V2SI V2SF ; two regs + V3SI V3SF ; three regs + TI V4SI V4SF V2DI V2DF]) ; four regs + +;; Mapping of full vector modes to shorter vectors +(define_mode_attr scalar_mode + [(V64QI "QI") (V64HI "HI") (V64SI "SI") + (V64HF "HF") (V64SF "SF") (V64DI "DI") (V64DF "DF")]) + +(define_mode_attr v2_mode + [(V64QI "V2QI") (V64HI "V2HI") (V64SI "V2SI") + (V64HF "V2HF") (V64SF "V2SF") (V64DI "V2DI") (V64DF "V2DF")]) + +(define_mode_attr v4_mode + [(V64QI "V4QI") (V64HI "V4HI") (V64SI "V4SI") + (V64HF "V4HF") (V64SF "V4SF") (V64DI "V4DI") (V64DF "V4DF")]) + +(define_mode_attr v8_mode + [(V64QI "V8QI") (V64HI "V8HI") (V64SI "V8SI") + (V64HF "V8HF") (V64SF "V8SF") (V64DI "V8DI") (V64DF "V8DF")]) + +(define_mode_attr v16_mode + [(V64QI "V16QI") (V64HI "V16HI") (V64SI "V16SI") + (V64HF "V16HF") (V64SF "V16SF") (V64DI "V16DI") (V64DF "V16DF")]) + +(define_mode_attr v32_mode + [(V64QI "V32QI") (V64HI "V32HI") (V64SI "V32SI") + (V64HF "V32HF") (V64SF "V32SF") (V64DI "V32DI") (V64DF "V32DF")]) + +(define_insn "*simple_buffer_mov<mode>" + [(set (match_operand:SCALAR_FLAT_MODE 0 "nonimmediate_operand" "=RBm,v") + (match_operand:SCALAR_FLAT_MODE 1 "general_operand" "v, RBm")) + (use (match_operand:TI 2 "register_operand" "Sg, Sg")) + (use (match_operand:DI 3 "gcn_exec_reg_operand" "e, e"))] + "memory_operand (operands[0], VOIDmode) != memory_operand (operands[1], VOIDmode)" + "@ + buffer_store%s1 %1, off, %2, %A0\n\ts_waitcnt vmcnt(0) expcnt(0) + buffer_load%s0 %0, off, %2, %A1\n\ts_waitcnt vmcnt(0)" + [(set_attr "type" "flat") + (set_attr "mode" "<MODE>")]) + +;; This ethernal uglyness makes LRA to sort-of work. +(define_insn_and_split "*ugly_reload_mov<mode>" + [(set (match_operand:SCALAR_FLAT_MODE 0 "nonimmediate_operand" "=RBm,v, ^RBm,^v, SD, SD, v, v , ^v, ^v , v, Sm, RDRF,v") + (match_operand:SCALAR_FLAT_MODE 1 "general_operand" "v, RBm, v, RBm, SSB,SSn,vn, SS, vn, SS, SS, vSS,v ,RDRF")) + (use (match_operand:TI 2 "register_operand" "Sg, Sg, Sg, Sg, Sg, Sg, Sg, Sg, Sg, Sg, Sg, Sg, Sg ,Sg")) + (use (match_operand:DI 3 "gcn_exec_operand" "e ,e ,Sgn, Sgn, SSn,SSn,e ,e ,Sgn, Sgn,SSO,SSO,e ,e")) + (clobber (match_operand:DI 4 "register_operand" "=&Sg,Sg,Sg,Sg,Sg,Sg,Sg,Sg,Sg,Sg,Sg,Sg,Sg,Sg"))] + "(!immediate_operand (operands[1], VOIDmode) || register_operand (operands[0], VOIDmode)) + && (!memory_operand (operands[0], VOIDmode) || !memory_operand (operands[1], VOIDmode))" + "#" + "!memory_operand (operands[0],<MODE>mode) && !memory_operand (operands[1],<MODE>mode) + && !gcn_vgpr_register_operand (operands[0], <MODE>mode) + && !gcn_vgpr_register_operand (operands[1], <MODE>mode)" + [(set (match_dup 0) (match_dup 1))] +{} + [(set_attr "type" "flat") + (set_attr "mode" "<MODE>")]) + +(define_split + [(set (match_operand:SCALAR_FLAT_MODE 0 "nonimmediate_operand") + (match_operand:SCALAR_FLAT_MODE 1 "general_operand")) + (use (match_operand:TI 2 "register_operand")) + (use (match_operand:DI 3 "gcn_exec_operand")) + (clobber (match_scratch:DI 4 ""))] + "REG_P (operands[3]) && REGNO (operands[3]) == EXEC_REG + && (memory_operand (operands[0], VOIDmode) || memory_operand (operands[1], VOIDmode))" + [(parallel [(set (match_dup 0) (match_dup 1)) + (use (match_dup 2)) + (use (match_dup 3))])]) + +(define_split + [(set (match_operand:SCALAR_FLAT_MODE 0 "nonimmediate_operand") + (match_operand:SCALAR_FLAT_MODE 1 "general_operand")) + (use (match_operand:TI 2 "register_operand")) + (use (match_operand:DI 3 "gcn_exec_operand")) + (clobber (match_scratch:DI 4 ""))] + "REG_P (operands[3]) && REGNO (operands[3]) == EXEC_REG" + [(parallel [(set (match_dup 0) (match_dup 1)) + (use (match_dup 3))])]) + +(define_split + [(set (match_operand:SCALAR_FLAT_MODE 0 "nonimmediate_operand") + (match_operand:SCALAR_FLAT_MODE 1 "general_operand")) + (use (match_operand:TI 2 "register_operand")) + (use (match_operand:DI 3 "gcn_exec_operand")) + (clobber (match_operand:DI 4 "register_operand"))] + "(memory_operand (operands[0], VOIDmode) || memory_operand (operands[1], VOIDmode))" + [(set (match_dup 4) (reg:DI EXEC_REG)) + (set (reg:DI EXEC_REG) (match_dup 3)) + (parallel [(set (match_dup 0) (match_dup 1)) + (use (match_dup 2)) + (use (reg:DI EXEC_REG))]) + (set (reg:DI EXEC_REG) (match_dup 4))]) + +(define_split + [(set (match_operand:SCALAR_FLAT_MODE 0 "nonimmediate_operand") + (match_operand:SCALAR_FLAT_MODE 1 "general_operand")) + (use (match_operand:TI 2 "register_operand")) + (use (match_operand:DI 3 "gcn_exec_operand")) + (clobber (match_operand:DI 4 "register_operand"))] + "" + [(set (match_dup 4) (reg:DI EXEC_REG)) + (set (reg:DI EXEC_REG) (match_dup 3)) + (parallel [(set (match_dup 0) (match_dup 1)) + (use (reg:DI EXEC_REG))]) + (set (reg:DI EXEC_REG) (match_dup 4))]) + +(define_insn "*mov<mode>" + [(set (match_operand:VEC_1REG_MODE 0 "nonimmediate_operand" "=v,RD,RF,v ,v") + (vec_merge:VEC_1REG_MODE + (match_operand:VEC_1REG_MODE 1 "gcn_load_operand" "vB,v ,v,RD,RF") + (match_operand:VEC_1REG_MODE 3 "gcn_register_or_unspec_operand" "0U,0U,0U,0U,0U") + (match_operand:DI 2 "gcn_exec_reg_operand" "e,e,e,e,e")))] + "!memory_operand (operands[0], VOIDmode) || register_operand (operands[1], VOIDmode)" + "@ + v_mov_b32\t%0, %1 + ds_write_b32\t%A0, %1%O0 + flat_store%s1\t%A0, %1 + ds_read_b32\t%0, %A1%O1 + flat_load%s0\t%0, %A1\n\ts_waitcnt\tlgkmcnt(0),vmcnt(0)" + [(set_attr "type" "vop1,dsmem,flat,dsmem,flat") + (set_attr "mode" "<MODE>")]) + +(define_insn "*mov<mode>" + [(set (match_operand:VEC_2REG_MODE 0 "register_operand" "=&v") + (vec_merge:VEC_2REG_MODE + (match_operand:VEC_2REG_MODE 1 "gcn_alu_operand" "0vB") + (match_operand:VEC_2REG_MODE 3 "gcn_register_or_unspec_operand" "0U") + (match_operand:DI 2 "gcn_exec_reg_operand" "e")))] + "!memory_operand (operands[0], VOIDmode) || register_operand (operands[1], VOIDmode)" + "v_mov_b32\t%L0, %L1\n\tv_mov_b32\t%H0, %H1" + [(set_attr "type" "vop1") + (set_attr "mode" "<MODE>")]) + +; TODO: Add zero/sign extending variants. + +;; ------------------------------------------------------------------------- +;; Vector lane moves +;; ------------------------------------------------------------------------- + +; v_writelane/readlane works regardless of exec flags. +; We allow source to be scratch +(define_insn "*vec_set<mode>" + [(set (match_operand:VEC_1REG_MODE 0 "register_operand" "=v") + (vec_merge:VEC_1REG_MODE + (vec_duplicate:VEC_1REG_MODE + (match_operand:<scalar_mode> 1 "register_operand" "SS")) + (match_operand:VEC_1REG_MODE 3 "gcn_register_or_unspec_operand" "0U") + (ashift (const_int 1) (match_operand:SI 2 "gcn_alu_operand" "SSB"))))] + "" + "v_writelane_b32 %0, %1, %2" + [(set_attr "type" "vop3a") + (set_attr "mode" "<scalar_mode>") + (set_attr "exec" "any")]) + +; FIXME: 64bit operations really should be splitters, but I am not sure how +; to represent vertical subregs. +(define_insn "*vec_set<mode>" + [(set (match_operand:VEC_2REG_MODE 0 "register_operand" "=v") + (vec_merge:VEC_2REG_MODE + (vec_duplicate:VEC_2REG_MODE + (match_operand:<scalar_mode> 1 "register_operand" "SS")) + (match_operand:VEC_2REG_MODE 3 "gcn_register_or_unspec_operand" "0U") + (ashift (const_int 1) (match_operand:SI 2 "gcn_alu_operand" "SSB"))))] + "" + "v_writelane_b32 %L0, %L1, %2\n\tv_writelane_b32 %H0, %H1, %2" + [(set_attr "type" "vop3a") + (set_attr "mode" "<scalar_mode>") + (set_attr "exec" "any")]) + +(define_expand "vec_set<mode>" + [(set (match_operand:VEC_1REG_MODE 0 "register_operand" "") + (vec_merge:VEC_1REG_MODE + (vec_duplicate:VEC_1REG_MODE + (match_operand:<scalar_mode> 1 "register_operand" "")) + (match_dup 0) + (ashift (const_int 1) (match_operand:SI 2 "gcn_alu_operand" ""))))] + "") + +(define_insn "*vec_set<mode>_1" + [(set (match_operand:VEC_1REG_MODE 0 "register_operand" "") + (vec_merge:VEC_1REG_MODE + (vec_duplicate:VEC_1REG_MODE + (match_operand:<scalar_mode> 1 "register_operand" "")) + (match_operand:VEC_1REG_MODE 3 "gcn_register_or_unspec_operand" "") + (match_operand:SI 2 "const_int_operand")))] + "((unsigned) exact_log2 (INTVAL (operands[2])) < 64)" +{ + operands[2] = GEN_INT (exact_log2 (INTVAL (operands[2]))); + return "v_writelane_b32 %0, %1, %2"; +} + [(set_attr "type" "vop3a") + (set_attr "mode" "<scalar_mode>") + (set_attr "exec" "any")]) + +(define_insn "*vec_set<mode>_1" + [(set (match_operand:VEC_2REG_MODE 0 "register_operand" "") + (vec_merge:VEC_2REG_MODE + (vec_duplicate:VEC_2REG_MODE + (match_operand:<scalar_mode> 1 "register_operand" "")) + (match_operand:VEC_2REG_MODE 3 "gcn_register_or_unspec_operand" "") + (match_operand:SI 2 "const_int_operand")))] + "((unsigned) exact_log2 (INTVAL (operands[2])) < 64)" +{ + operands[2] = GEN_INT (exact_log2 (INTVAL (operands[2]))); + return "v_writelane_b32 %L0, %L1, %2\nv_writelane_b32 %H0, %H1, %2"; +} + [(set_attr "type" "vop3a") + (set_attr "mode" "<scalar_mode>") + (set_attr "exec" "any")]) + +(define_insn "vec_duplicate<mode>" + [(set (match_operand:VEC_1REG_MODE 0 "register_operand" "=v") + (vec_merge:VEC_1REG_MODE + (vec_duplicate:VEC_1REG_MODE + (match_operand:<scalar_mode> 1 "gcn_alu_operand" "SSB")) + (match_operand:VEC_1REG_MODE 3 "gcn_register_or_unspec_operand" "0U") + (match_operand:DI 2 "gcn_exec_reg_operand" "e")))] + "" + "v_mov_b32\t%0, %1" + [(set_attr "type" "vop3a") + (set_attr "mode" "<scalar_mode>") + (set_attr "exec" "any")]) + +(define_insn "vec_duplicate<mode>" + [(set (match_operand:VEC_2REG_MODE 0 "register_operand" "=v") + (vec_merge:VEC_2REG_MODE + (vec_duplicate:VEC_2REG_MODE + (match_operand:<scalar_mode> 1 "register_operand" "SS")) + (match_operand:VEC_2REG_MODE 3 "gcn_register_or_unspec_operand" "0U") + (match_operand:DI 2 "gcn_exec_reg_operand" "e")))] + "" + "v_mov_b32\t%L0, %L1\n\tv_mov_b32\t%H0, %H1" + [(set_attr "type" "vop3a") + (set_attr "mode" "<scalar_mode>") + (set_attr "exec" "any")]) + +(define_insn "vec_extract<mode>" + [(set (match_operand:<scalar_mode> 0 "register_operand" "=Sm") + (vec_select:<scalar_mode> + (match_operand:VEC_1REG_MODE 1 "register_operand" "v") + (parallel [(match_operand:SI 2 "gcn_alu_operand" "SSB")])))] + "" + "v_readlane_b32 %0, %1, %2" + [(set_attr "type" "vop3a") + (set_attr "mode" "<scalar_mode>") + (set_attr "exec" "any")]) + +(define_insn "vec_extract<mode>" + [(set (match_operand:<scalar_mode> 0 "register_operand" "=Sm") + (vec_select:<scalar_mode> + (match_operand:VEC_2REG_MODE 1 "register_operand" "v") + (parallel [(match_operand:SI 2 "gcn_alu_operand" "SSB")])))] + "" + "v_readlane_b32 %L0, %L1, %L2 + v_readlane_b32 %H0, %H1, %H2" + [(set_attr "type" "vop3a") + (set_attr "mode" "<scalar_mode>") + (set_attr "exec" "any")]) + +(define_expand "vec_init<mode>" + [(match_operand:VEC_REG_MODE 0 "register_operand") + (match_operand 1)] + "" +{ + gcn_expand_vector_init (operands[0], operands[1]); + DONE; +}) + +;; ------------------------------------------------------------------------- +;; Generic expanders for vector operations. +;; ------------------------------------------------------------------------- + +(define_mode_iterator V64SIDI [V64SI V64DI]) + +; Integer operations that produce condition code +(define_expand "<expander><mode>3" + [(parallel [ + (set (match_operand:V64SIDI 0 "register_operand" "") + (vec_merge:V64SIDI + (plus_minus:V64SIDI + (match_operand:V64SIDI 1 "register_operand" "") + (match_operand:V64SIDI 2 "gcn_alu_operand" "")) + (match_dup 4) + (match_dup 3))) + (clobber (reg:DI VCC_REG))])] + "" +{ + operands[3] = gcn_full_exec_reg (); + operands[4] = gcn_gen_undef (<MODE>mode); +}) + +(define_mode_iterator VEC_INT_MODE [V64QI V64HI V64SI V64DI]) +(define_code_iterator bitop [and ior xor]) + +(define_expand "<expander><mode>3" + [(set (match_operand:VEC_INT_MODE 0 "gcn_valu_dst_operand" "") + (vec_merge:VEC_INT_MODE + (bitop:VEC_INT_MODE + (match_operand:VEC_INT_MODE 1 "gcn_valu_src0_operand" "") + (match_operand:VEC_INT_MODE 2 "gcn_valu_src1com_operand" "")) + (match_dup 4) + (match_dup 3)))] + "" +{ + operands[3] = gcn_full_exec_reg (); + operands[4] = gcn_gen_undef (<MODE>mode); +}) + +(define_code_iterator shiftop [ashift lshiftrt ashiftrt]) +(define_expand "<expander><mode>3" + [(set (match_operand:VEC_INT_MODE 0 "register_operand" "") + (vec_merge:VEC_INT_MODE + (shiftop:VEC_INT_MODE + (match_operand:VEC_INT_MODE 1 "register_operand" "") + (match_operand:V64SI 2 "gcn_alu_operand" "")) + (match_dup 4) + (match_dup 3)))] + "" +{ + operands[3] = gcn_full_exec_reg (); + operands[4] = gcn_gen_undef (<MODE>mode); +}) + +;; ------------------------------------------------------------------------- +;; ALU special cases: Plus +;; ------------------------------------------------------------------------- + + +; Turn vector pattern into scalar variant. + +(define_subst "vec_to_scalar" + [(set (match_operand:VEC_REG_MODE 0) + (vec_merge:VEC_REG_MODE + (match_operator:VEC_REG_MODE 1 "" + [(match_operand:VEC_REG_MODE 2) + (match_operand:VEC_REG_MODE 3)]) + (match_operand:VEC_REG_MODE 4) + (match_operand:DI 5))) + (clobber (reg:DI VCC_REG))] + "" + [(set (match_dup:<scalar_mode> 0) + (match_op_dup:<scalar_mode> 1 + [(match_dup:<scalar_mode> 1) (match_dup:<scalar_mode> 2)])) + (use (match_dup:<scalar_mode> 5)) + (clobber (reg:DI VCC_REG))]) + +(define_subst "vec_to_scalar" + [(set (match_operand:VEC_REG_MODE 0) + (vec_merge:VEC_REG_MODE + (match_operator:VEC_REG_MODE 1 "" + [(match_operand:VEC_REG_MODE 2) + (match_operand:VEC_REG_MODE 3)]) + (match_operand:VEC_REG_MODE 4) + (match_operand:DI 5)))] + "" + [(set (match_dup:<scalar_mode> 0) + (match_op_dup:<scalar_mode> 1 + [(match_dup:<scalar_mode> 1) (match_dup:<scalar_mode> 2)])) + (use (match_dup:<scalar_mode> 5))]) + +(define_subst_attr "vec_suffix" + "vec_to_scalar" "vector" "scalar") + +(define_insn "addv64si3_<vec_suffix>" + [(set (match_operand:V64SI 0 "register_operand" "=v") + (vec_merge:V64SI + (plus:V64SI + (match_operand:V64SI 1 "register_operand" "%v") + (match_operand:V64SI 2 "gcn_alu_operand" " vSSB")) + (match_operand:V64SI 4 "gcn_register_or_unspec_operand" "0U") + (match_operand:DI 3 "gcn_exec_reg_operand" "e"))) + (clobber (reg:DI VCC_REG))] + "" + "v_add_u32\t%0, vcc, %2, %1" + [(set_attr "type" "vop2") + (set_attr "mode" "V64SI")]) + +(define_insn "addv64si3_vector_dup" + [(set (match_operand:V64SI 0 "register_operand" "=v") + (vec_merge:V64SI + (plus:V64SI + (vec_duplicate:V64SI + (match_operand:SI 2 "gcn_alu_operand" "SSB")) + (match_operand:V64SI 1 "register_operand" "v")) + (match_operand:V64SI 4 "gcn_register_or_unspec_operand" "0U") + (match_operand:DI 3 "gcn_exec_reg_operand" "e"))) + (clobber (reg:DI VCC_REG))] + "" + "v_add_u32\t%0, vcc, %1, %2" + [(set_attr "type" "vop2") + (set_attr "mode" "SI")]) + +(define_insn "addv64si3_vector_vcc" + [(set (match_operand:V64SI 0 "register_operand" "=v,v") + (vec_merge:V64SI + (plus:V64SI + (match_operand:V64SI 1 "register_operand" "%v,v") + (match_operand:V64SI 2 "gcn_alu_operand" " vSSB,vSSB")) + (match_operand:V64SI 4 "gcn_register_or_unspec_operand" "0U,0U") + (match_operand:DI 3 "gcn_exec_reg_operand" "e,e"))) + (set (match_operand:DI 5 "register_operand" "=cV,Sg") + (ior:DI (and:DI (ltu:DI (plus:V64SI (match_dup 1) + (match_dup 2)) + (match_dup 1)) + (match_dup 3)) + (and:DI (not:DI (match_dup 3)) + (match_operand:DI 6 "gcn_register_or_unspec_operand" "5U,5U"))))] + "" + "v_add_u32\t%0, %5, %1, %2" + [(set_attr "type" "vop2,vop3b") + (set_attr "mode" "SI")]) + +;; Tom says that he thinks the previous value of VCC is unchanged when +;; execution lane is masked out. + +(define_insn "addv64si3_vector_vcc_dup" + [(set (match_operand:V64SI 0 "register_operand" "=v,v") + (vec_merge:V64SI + (plus:V64SI + (vec_duplicate:V64SI (match_operand:SI 2 "gcn_alu_operand" "SSB,SSB")) + (match_operand:V64SI 1 "register_operand" "v,v")) + (match_operand:V64SI 4 "gcn_register_or_unspec_operand" "0U,0U") + (match_operand:DI 3 "gcn_exec_reg_operand" "e,e"))) + (set (match_operand:DI 5 "register_operand" "=cV,Sg") + (ior:DI (and:DI (ltu:DI (plus:V64SI (vec_duplicate:V64SI (match_dup 2)) + (match_dup 1)) + (vec_duplicate:V64SI (match_dup 2))) + (match_dup 3)) + (and:DI (not:DI (match_dup 3)) + (match_operand:DI 6 "gcn_register_or_unspec_operand" "5U,5U"))))] + "" + "v_add_u32\t%0, %5, %1, %2" + [(set_attr "type" "vop2,vop3b") + (set_attr "mode" "SI")]) + +;; This pattern does not accept SGPR because VCC read already counts as a SGPR use +;; and number of SGPR operands is limited to 1. +(define_insn "addcv64si3_vec" + [(set (match_operand:V64SI 0 "register_operand" "=v,v") + (vec_merge:V64SI (plus:V64SI (plus:V64SI + (vec_merge:V64SI + (match_operand:V64SI 7 "gcn_vec1_operand" "A,A") + (match_operand:V64SI 8 "gcn_vec0_operand" "A,A") + (match_operand:DI 5 "register_operand" "cV,cV")) + (match_operand:V64SI 1 "gcn_alu_operand" "%vA,vA")) + (match_operand:V64SI 2 "gcn_alu_operand" "vB,vB")) + (match_operand:V64SI 4 "gcn_register_or_unspec_operand" "0U,0U") + (match_operand:DI 3 "gcn_exec_reg_operand" "e,e"))) + (set (match_operand:DI 6 "register_operand" "=cV,Sg") + (ior:DI (and:DI (ior:DI (ltu:DI (plus:V64SI (plus:V64SI + (vec_merge:V64SI + (match_dup 7) + (match_dup 8) + (match_dup 5)) + (match_dup 1)) + (match_dup 2)) + (match_dup 2)) + (ltu:DI (plus:V64SI (vec_merge:V64SI + (match_dup 7) + (match_dup 8) + (match_dup 5)) + (match_dup 1)) + (match_dup 1))) + (match_dup 3)) + (and:DI (not:DI (match_dup 3)) + (match_operand:DI 9 "gcn_register_or_unspec_operand" "6U,6U"))))] + "" + "v_addc_u32\t%0, %6, %1, %2, vcc" + [(set_attr "type" "vop2,vop3b") + (set_attr "mode" "SI")]) + + +(define_insn "subv64si3_<vec_suffix>" + [(set (match_operand:V64SI 0 "register_operand" "=v,v") + (vec_merge:V64SI + (minus:V64SI + (match_operand:V64SI 1 "gcn_alu_operand" "vSSB,v") + (match_operand:V64SI 2 "gcn_alu_operand" "v,vSSB")) + (match_operand:V64SI 4 "gcn_register_or_unspec_operand" "0U,0U") + (match_operand:DI 3 "gcn_exec_reg_operand" "e,e"))) + (clobber (reg:DI VCC_REG))] + "register_operand (operands[1], VOIDmode) || register_operand (operands[2], VOIDmode)" + "@ + v_sub_u32\t%0, vcc, %2, %1 + v_subrev_u32\t%0, vcc, %2, %1" + [(set_attr "type" "vop2") + (set_attr "mode" "SI")]) + +(define_insn "subv64si3_vector_vcc" + [(set (match_operand:V64SI 0 "register_operand" "=v,v,v,v") + (vec_merge:V64SI + (minus:V64SI + (match_operand:V64SI 1 "gcn_alu_operand" "vSSB,vSSB,v,v") + (match_operand:V64SI 2 "gcn_alu_operand" "v,v,vSSB,vSSB")) + (match_operand:V64SI 4 "gcn_register_or_unspec_operand" "0U,0U,0U,0U") + (match_operand:DI 3 "gcn_exec_reg_operand" "e,e,e,e"))) + (set (match_operand:DI 5 "register_operand" "=cV,Sg,cV,Sg") + (ior:DI (and:DI (gtu:DI (minus:V64SI (match_dup 1) + (match_dup 2)) + (match_dup 1)) + (match_dup 3)) + (and:DI (not:DI (match_dup 3)) + (match_operand:DI 6 "gcn_register_or_unspec_operand" "5U,5U,5U,5U"))))] + "register_operand (operands[1], VOIDmode) || register_operand (operands[2], VOIDmode)" + "@ + v_sub_u32\t%0, %5, %2, %1 + v_sub_u32\t%0, %5, %2, %1 + v_subrev_u32\t%0, %5, %2, %1 + v_subrev_u32\t%0, %5, %2, %1" + [(set_attr "type" "vop2,vop3b,vop2,vop3b") + (set_attr "mode" "SI")]) + +;; This pattern does not accept SGPR because VCC read already counts +;; as a SGPR use and number of SGPR operands is limited to 1. +(define_insn "subcv64si3_vec" + [(set (match_operand:V64SI 0 "register_operand" "=v,v,v,v") + (vec_merge:V64SI (minus:V64SI (minus:V64SI + (vec_merge:V64SI + (match_operand:V64SI 7 "gcn_vec1_operand" "A,A,A,A") + (match_operand:V64SI 8 "gcn_vec0_operand" "A,A,A,A") + (match_operand:DI 5 "gcn_alu_operand" "cV,cV,cV,cV")) + (match_operand:V64SI 1 "gcn_alu_operand" "vA,vA,vB,vB")) + (match_operand:V64SI 2 "gcn_alu_operand" "vB,vB,vA,vA")) + (match_operand:V64SI 4 "gcn_register_or_unspec_operand" "0U,0U,0U,0U") + (match_operand:DI 3 "gcn_exec_reg_operand" "e,e,e,e"))) + (set (match_operand:DI 6 "register_operand" "=cV,Sg,cV,Sg") + (ior:DI (and:DI (ior:DI (gtu:DI (minus:V64SI (minus:V64SI + (vec_merge:V64SI + (match_dup 7) + (match_dup 8) + (match_dup 5)) + (match_dup 1)) + (match_dup 2)) + (match_dup 2)) + (ltu:DI (minus:V64SI (vec_merge:V64SI + (match_dup 7) + (match_dup 8) + (match_dup 5)) + (match_dup 1)) + (match_dup 1))) + (match_dup 3)) + (and:DI (not:DI (match_dup 3)) + (match_operand:DI 9 "gcn_register_or_unspec_operand" "6U,6U,6U,6U"))))] + "register_operand (operands[1], VOIDmode) || register_operand (operands[2], VOIDmode)" + "@ + v_sub_u32\t%0, %5, %2, %1 + v_sub_u32\t%0, %5, %2, %1 + v_subrev_u32\t%0, %5, %2, %1 + v_subrev_u32\t%0, %5, %2, %1" + [(set_attr "type" "vop2,vop3b,vop2,vop3b") + (set_attr "mode" "SI")]) + +(define_insn_and_split "*addv64di3_vector" + [(set (match_operand:V64DI 0 "register_operand" "=v") + (vec_merge:V64DI + (plus:V64DI + (match_operand:V64DI 1 "register_operand" "%v") + (match_operand:V64DI 2 "gcn_alu_operand" "vSSB")) + (match_operand:V64DI 4 "gcn_register_or_unspec_operand" "0U") + (match_operand:DI 3 "gcn_exec_reg_operand" "e"))) + (clobber (reg:DI VCC_REG))] + "" + "#" + "gcn_can_split_p (V64DImode, operands[0]) + && gcn_can_split_p (V64DImode, operands[1]) + && gcn_can_split_p (V64DImode, operands[2]) + && gcn_can_split_p (V64DImode, operands[4])" + [(const_int 0)] +{ + rtx vcc = gen_rtx_REG (DImode, VCC_REG); + emit_insn (gen_addv64si3_vector_vcc (gcn_operand_part (V64DImode, operands[0], 0), + gcn_operand_part (V64DImode, operands[1], 0), + gcn_operand_part (V64DImode, operands[2], 0), + operands[3], + gcn_operand_part (V64DImode, operands[4], 0), + vcc, + gcn_gen_undef (DImode))); + emit_insn (gen_addcv64si3_vec (gcn_operand_part (V64DImode, operands[0], 1), + gcn_operand_part (V64DImode, operands[1], 1), + gcn_operand_part (V64DImode, operands[2], 1), + operands[3], + gcn_operand_part (V64DImode, operands[4], 1), + vcc, vcc, gcn_vec_constant (V64SImode, 1), + gcn_vec_constant (V64SImode, 0), + gcn_gen_undef (DImode))); + DONE; +}) + +(define_insn_and_split "subv64di3_vec" + [(set (match_operand:V64DI 0 "register_operand" "=v,v") + (vec_merge:V64DI + (minus:V64DI + (match_operand:V64DI 1 "gcn_alu_operand" "vSSB,v") + (match_operand:V64DI 2 "gcn_alu_operand" "v,vSSB")) + (match_operand:V64DI 4 "gcn_register_or_unspec_operand" "0U,0U") + (match_operand:DI 3 "gcn_exec_reg_operand" "e,e"))) + (clobber (reg:DI VCC_REG))] + "register_operand (operands[1], VOIDmode) || register_operand (operands[2], VOIDmode)" + "#" + "gcn_can_split_p (V64DImode, operands[0]) + && gcn_can_split_p (V64DImode, operands[1]) + && gcn_can_split_p (V64DImode, operands[2]) + && gcn_can_split_p (V64DImode, operands[4])" + [(const_int 0)] +{ + rtx vcc = gen_rtx_REG (DImode, VCC_REG); + emit_insn (gen_subv64si3_vector_vcc (gcn_operand_part (V64DImode, operands[0], 0), + gcn_operand_part (V64DImode, operands[1], 0), + gcn_operand_part (V64DImode, operands[2], 0), + operands[3], + gcn_operand_part (V64DImode, operands[4], 0), + vcc, + gcn_gen_undef (DImode))); + emit_insn (gen_subcv64si3_vec (gcn_operand_part (V64DImode, operands[0], 1), + gcn_operand_part (V64DImode, operands[1], 1), + gcn_operand_part (V64DImode, operands[2], 1), + operands[3], + gcn_operand_part (V64DImode, operands[4], 1), + vcc, vcc, gcn_vec_constant (V64SImode, 1), + gcn_vec_constant (V64SImode, 0), + gcn_gen_undef (DImode))); + DONE; +}) + +(define_insn_and_split "addv64di3_zext" + [(set (match_operand:V64DI 0 "register_operand" "=&v,v") + (vec_merge:V64DI + (plus:V64DI + (zero_extend:V64DI + (match_operand:V64SI 1 "gcn_alu_operand" "0vA,0vB")) + (match_operand:V64DI 2 "gcn_alu_operand" "0vB,0vA")) + (match_operand:V64DI 4 "gcn_register_or_unspec_operand" "0U,0U") + (match_operand:DI 3 "gcn_exec_reg_operand" "e,e"))) + (clobber (reg:DI VCC_REG))] + "" + "#" + "gcn_can_split_p (V64DImode, operands[0]) + && gcn_can_split_p (V64DImode, operands[2]) + && gcn_can_split_p (V64DImode, operands[4])" + [(const_int 0)] +{ + rtx vcc = gen_rtx_REG (DImode, VCC_REG); + emit_insn (gen_addv64si3_vector_vcc (gcn_operand_part (V64DImode, operands[0], 0), + operands[1], + gcn_operand_part (V64DImode, operands[2], 0), + operands[3], + gcn_operand_part (V64DImode, operands[4], 0), + vcc, + gcn_gen_undef (DImode))); + emit_insn (gen_addcv64si3_vec (gcn_operand_part (V64DImode, operands[0], 1), + gcn_operand_part (V64DImode, operands[2], 1), + const0_rtx, + operands[3], + gcn_operand_part (V64DImode, operands[4], 1), + vcc, vcc, gcn_vec_constant (V64SImode, 1), + gcn_vec_constant (V64SImode, 0), + gcn_gen_undef (DImode))); + DONE; +}) + +(define_insn_and_split "addv64di3_zext_dup" + [(set (match_operand:V64DI 0 "register_operand" "=&v") + (vec_merge:V64DI + (plus:V64DI + (zero_extend:V64DI + (vec_duplicate:V64SI + (match_operand:SI 1 "gcn_alu_operand" "BSS"))) + (match_operand:V64DI 2 "gcn_alu_operand" "0vA")) + (match_operand:V64DI 4 "gcn_register_or_unspec_operand" "0U") + (match_operand:DI 3 "gcn_exec_reg_operand" "e"))) + (clobber (reg:DI VCC_REG))] + "" + "#" + "gcn_can_split_p (V64DImode, operands[0]) + && gcn_can_split_p (V64DImode, operands[2]) + && gcn_can_split_p (V64DImode, operands[4])" + [(const_int 0)] +{ + rtx vcc = gen_rtx_REG (DImode, VCC_REG); + emit_insn (gen_addv64si3_vector_vcc_dup (gcn_operand_part (V64DImode, operands[0], 0), + gcn_operand_part (DImode, operands[1], 0), + gcn_operand_part (V64DImode, operands[2], 0), + operands[3], + gcn_operand_part (V64DImode, operands[4], 0), + vcc, + gcn_gen_undef (DImode))); + emit_insn (gen_addcv64si3_vec (gcn_operand_part (V64DImode, operands[0], 1), + gcn_operand_part (V64DImode, operands[2], 1), + const0_rtx, operands[3], + gcn_operand_part (V64DImode, operands[4], 1), + vcc, vcc, gcn_vec_constant (V64SImode, 1), + gcn_vec_constant (V64SImode, 0), + gcn_gen_undef (DImode))); + DONE; +}) + +(define_insn_and_split "addv64di3_zext_dup2" + [(set (match_operand:V64DI 0 "register_operand" "=v") + (vec_merge:V64DI + (plus:V64DI + (zero_extend:V64DI + (match_operand:V64SI 1 "gcn_alu_operand" "vA")) + (vec_duplicate:V64DI + (match_operand:DI 2 "gcn_alu_operand" "BSS"))) + (match_operand:V64DI 4 "gcn_register_or_unspec_operand" "0U") + (match_operand:DI 3 "gcn_exec_reg_operand" "e"))) + (clobber (reg:DI VCC_REG))] + "" + "#" + "gcn_can_split_p (V64DImode, operands[0]) + && gcn_can_split_p (V64DImode, operands[4])" + [(const_int 0)] +{ + rtx vcc = gen_rtx_REG (DImode, VCC_REG); + emit_insn (gen_addv64si3_vector_vcc_dup (gcn_operand_part (V64DImode, operands[0], 0), + operands[1], + gcn_operand_part (DImode, operands[2], 0), + operands[3], + gcn_operand_part (V64DImode, operands[4], 0), + vcc, + gcn_gen_undef (DImode))); + rtx dsthi = gcn_operand_part (V64DImode, operands[0], 1); + emit_insn (gen_vec_duplicatev64si (dsthi, gcn_operand_part (DImode, operands[2], 1), + operands[3], + gcn_gen_undef (V64SImode))); + emit_insn (gen_addcv64si3_vec (dsthi, dsthi, const0_rtx, operands[3], + gcn_operand_part (V64DImode, operands[4], 1), + vcc, vcc, gcn_vec_constant (V64SImode, 1), + gcn_vec_constant (V64SImode, 0), + gcn_gen_undef (DImode))); + DONE; +}) + +(define_mode_iterator DS_ARITH_MODE [V64SI V64SF V64DI]) + +(define_insn "add<mode>3_ds_<vec_suffix>" + [(set (match_operand:DS_ARITH_MODE 0 "gcn_ds_memory_operand" "=RD") + (vec_merge:DS_ARITH_MODE + (plus:DS_ARITH_MODE + (match_operand:DS_ARITH_MODE 1 "gcn_ds_memory_operand" "%RD") + (match_operand:DS_ARITH_MODE 2 "register_operand" "v")) + (match_operand:DS_ARITH_MODE 4 "gcn_register_ds_or_unspec_operand" "0U") + (match_operand:DI 3 "gcn_exec_reg_operand" "e")))] + "rtx_equal_p (operands[0], operands[1])" + "ds_add%u0\t%A0, %2%O0" + [(set_attr "type" "vop2") + (set_attr "mode" "SI")]) + +(define_insn "sub<mode>3_ds_<vec_suffix>" + [(set (match_operand:DS_ARITH_MODE 0 "gcn_ds_memory_operand" "=RD") + (vec_merge:DS_ARITH_MODE + (minus:DS_ARITH_MODE + (match_operand:DS_ARITH_MODE 1 "gcn_ds_memory_operand" "RD") + (match_operand:DS_ARITH_MODE 2 "register_operand" "v")) + (match_operand:DS_ARITH_MODE 4 "gcn_register_ds_or_unspec_operand" "0U") + (match_operand:DI 3 "gcn_exec_reg_operand" "e")))] + "rtx_equal_p (operands[0], operands[1])" + "ds_sub%u0\t%A0, %2%O0" + [(set_attr "type" "vop2") + (set_attr "mode" "SI")]) + +(define_insn "subr<mode>3_ds_<vec_suffix>" + [(set (match_operand:DS_ARITH_MODE 0 "gcn_ds_memory_operand" "=RD") + (vec_merge:DS_ARITH_MODE + (minus:DS_ARITH_MODE + (match_operand:DS_ARITH_MODE 2 "register_operand" "v") + (match_operand:DS_ARITH_MODE 1 "gcn_ds_memory_operand" "RD")) + (match_operand:DS_ARITH_MODE 4 "gcn_register_ds_or_unspec_operand" "0U") + (match_operand:DI 3 "gcn_exec_reg_operand" "e")))] + "rtx_equal_p (operands[0], operands[1])" + "ds_rsub%u0\t%A0, %2%O0" + [(set_attr "type" "vop2") + (set_attr "mode" "SI")]) + +;; ------------------------------------------------------------------------- +;; ALU: mult +;; ------------------------------------------------------------------------- + +(define_code_iterator any_extend [sign_extend zero_extend]) +(define_code_attr sgnsuffix [(sign_extend "%i") (zero_extend "%u")]) +(define_code_attr s [(sign_extend "s") (zero_extend "u")]) + +(define_insn "<s>mulsi3_highpart_vector" + [(set (match_operand:V64SI 0 "register_operand" "=v") + (vec_merge:V64SI + (truncate:V64SI + (lshiftrt:V64DI + (mult:V64DI + (any_extend:V64DI + (match_operand:V64SI 1 "gcn_alu_operand" "%v")) + (any_extend:V64DI + (match_operand:V64SI 2 "gcn_alu_operand" "vSSB"))) + (const_int 32))) + (match_operand:V64SI 4 "gcn_register_ds_or_unspec_operand" "0U") + (match_operand:DI 3 "gcn_exec_reg_operand" "e")))] + "" + "v_mul_hi<sgnsuffix>0\t%0, %1, %2" + [(set_attr "type" "vop3a") + (set_attr "mode" "SI")]) + +(define_insn "<s>mulsi3_highpart_scalar" + [(set (match_operand:SI 0 "register_operand" "=v") + (truncate:SI + (lshiftrt:DI + (mult:DI + (any_extend:DI + (match_operand:SI 1 "register_operand" "%v")) + (any_extend:DI + (match_operand:SI 2 "gcn_alu_operand" "vSSB"))) + (const_int 32)))) + (use (match_operand:DI 3 "gcn_exec_reg_operand" "e"))] + "" + "v_mul_hi<sgnsuffix>0\t%0, %1, %2" + [(set_attr "type" "vop3a") + (set_attr "mode" "SI")]) + +;; ------------------------------------------------------------------------- +;; ALU the generic 32bit case +;; ------------------------------------------------------------------------- + +(define_insn "<expander><mode>3_<vec_suffix>" + [(set (match_operand:VEC_1REG_INT_MODE 0 "gcn_valu_dst_operand" "=v,RD") + (vec_merge:VEC_1REG_INT_MODE + (bitop:VEC_1REG_INT_MODE + (match_operand:VEC_1REG_INT_MODE 1 "gcn_valu_src0_operand" "%v,0") + (match_operand:VEC_1REG_INT_MODE 2 "gcn_valu_src1com_operand" "vSSB,v")) + (match_operand:VEC_1REG_INT_MODE 4 "gcn_register_ds_or_unspec_operand" "0U,0U") + (match_operand:DI 3 "gcn_exec_reg_operand" "e,e")))] + "!memory_operand (operands[0], VOIDmode) + || (rtx_equal_p (operands[0], operands[1]) && register_operand (operands[2], VOIDmode))" + "@ + v_<mnemonic>0\t%0, %2, %1 + ds_<mnemonic>0\t%A0, %2%O0" + [(set_attr "type" "vop2,dsmem") + (set_attr "mode" "V64SI")]) + +; We add earlyclobber just because I am lazy to determine order of oeprations +; in output template. VGPR register pairs are not aligned. +(define_insn "<expander>v64di3_<vec_suffix>" + [(set (match_operand:V64DI 0 "gcn_valu_dst_operand" "=&v,RD") + (vec_merge:V64DI + (bitop:V64DI + (match_operand:V64DI 1 "gcn_valu_src0_operand" "%0v,RD") + (match_operand:V64DI 2 "gcn_valu_src1com_operand" "v0SSB,v")) + (match_operand:V64DI 4 "gcn_register_ds_or_unspec_operand" "0U,0U") + (match_operand:DI 3 "gcn_exec_reg_operand" "e,e")))] + "!memory_operand (operands[0], VOIDmode) + || (rtx_equal_p (operands[0], operands[1]) && register_operand (operands[2], VOIDmode))" + "@ + v_<mnemonic>0\t%L0, %L2, %L1\n\tv_<mnemonic>0\t%H0, %H2, %H1 + ds_<mnemonic>0\t%A0, %2%O0" + [(set_attr "type" "vop2,dsmem") + (set_attr "mode" "V64SI,V64DI")]) + +(define_insn "<expander><mode>3_<vec_suffix>" + [(set (match_operand:V_INT_MODE 0 "register_operand" "=v") + (vec_merge:V_INT_MODE + (shiftop:V_INT_MODE + (match_operand:V_INT_MODE 1 "gcn_alu_operand" "v") + (match_operand:V64SI 2 "gcn_alu_operand" "vSSB")) + (match_operand:V_INT_MODE 4 "gcn_register_or_unspec_operand" "0U") + (match_operand:DI 3 "gcn_exec_reg_operand" "e")))] + "" + "v_<revmnemonic>0\t%0, %2, %1" + [(set_attr "type" "vop2") + (set_attr "mode" "<MODE>")]) + +;; ------------------------------------------------------------------------- +;; Generic FP binary operations +;; ------------------------------------------------------------------------- + +(define_mode_iterator VEC_FP_MODE + [V64HF V64SF V64DF]) +(define_mode_iterator FP_MODE + [HF SF DF]) + +(define_code_iterator comm_fp [plus mult smin smax]) +(define_code_iterator nocomm_fp [minus]) +(define_code_iterator all_fp [plus mult minus smin smax]) + +(define_insn "<expander><mode>_<vec_suffix>" + [(set (match_operand:VEC_FP_MODE 0 "register_operand" "=v") + (vec_merge:VEC_FP_MODE + (comm_fp:VEC_FP_MODE + (match_operand:VEC_FP_MODE 1 "register_operand" "%v") + (match_operand:VEC_FP_MODE 2 "gcn_alu_operand" " vSSB")) + (match_operand:VEC_FP_MODE 4 "gcn_register_or_unspec_operand" "0U") + (match_operand:DI 3 "gcn_exec_reg_operand" "e")))] + "" + "v_<mnemonic>0\t%0, %2, %1" + [(set_attr "type" "vop2") + (set_attr "mode" "<MODE>")]) + + +(define_insn "<expander><mode>_<vec_suffix>" + [(set (match_operand:VEC_FP_MODE 0 "register_operand" "=v,v") + (vec_merge:VEC_FP_MODE + (nocomm_fp:VEC_FP_MODE + (match_operand:VEC_FP_MODE 1 "register_operand" "vSSB,v") + (match_operand:VEC_FP_MODE 2 "gcn_alu_operand" "v,vSSB")) + (match_operand:VEC_FP_MODE 4 "gcn_register_or_unspec_operand" "0U,0U") + (match_operand:DI 3 "gcn_exec_reg_operand" "e,e")))] + "" + "@ + v_<mnemonic>0\t%0, %2, %1 + v_<revmnemonic>0\t%0, %2, %1" + [(set_attr "type" "vop2") + (set_attr "mode" "<MODE>")]) + +(define_expand "<expander><mode>3" + [(set (match_operand:VEC_FP_MODE 0 "gcn_valu_dst_operand" "") + (vec_merge:VEC_FP_MODE + (all_fp:VEC_FP_MODE + (match_operand:VEC_FP_MODE 1 "gcn_valu_src0_operand" "") + (match_operand:VEC_FP_MODE 2 "gcn_valu_src1_operand" "")) + (match_dup 4) + (match_dup 3)))] + "" +{ + operands[3] = gcn_full_exec_reg (); + operands[4] = gcn_gen_undef (<MODE>mode); +}) + +(define_expand "<expander><mode>3" + [(parallel [(set (match_operand:FP_MODE 0 "gcn_valu_dst_operand" "") + (all_fp:FP_MODE + (match_operand:FP_MODE 1 "gcn_valu_src0_operand" "") + (match_operand:FP_MODE 2 "gcn_valu_src1_operand" ""))) + (use (match_dup 3))])] + "" +{ + operands[3] = gcn_scalar_exec (); +}) diff --git a/gcc/config/gcn/gcn.c b/gcc/config/gcn/gcn.c new file mode 100644 index 00000000000..c80bdf5ccc9 --- /dev/null +++ b/gcc/config/gcn/gcn.c @@ -0,0 +1,2905 @@ +/* Copyright (C) 2016-2017 Free Software Foundation, Inc. + + This file is free software; you can redistribute it and/or modify it under + the terms of the GNU General Public License as published by the Free + Software Foundation; either version 3 of the License, or (at your option) + any later version. + + This file is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + for more details. + + You should have received a copy of the GNU General Public License + along with GCC; see the file COPYING3. If not see + <http://www.gnu.org/licenses/>. */ + +#include "config.h" +#include "system.h" +#include "coretypes.h" +#include "backend.h" +#include "target.h" +#include "memmodel.h" +#include "rtl.h" +#include "tree.h" +#include "gimple.h" +#include "cfghooks.h" +#include "cfgloop.h" +#include "df.h" +#include "tm_p.h" +#include "stringpool.h" +#include "expmed.h" +#include "optabs.h" +#include "regs.h" +#include "emit-rtl.h" +#include "recog.h" +#include "diagnostic-core.h" +#include "insn-attr.h" +#include "alias.h" +#include "fold-const.h" +#include "stor-layout.h" +#include "calls.h" +#include "varasm.h" +#include "explow.h" +#include "expr.h" +#include "output.h" +#include "cfgrtl.h" +#include "cfgbuild.h" +#include "langhooks.h" +#include "reload.h" +#include "sched-int.h" +#include "params.h" +#include "gimplify.h" +#include "tm-constrs.h" +#include "ddg.h" +#include "dumpfile.h" +#include "builtins.h" +#include "rtl-iter.h" +#include "cfghooks.h" + +/* This file should be included last. */ +#include "target-def.h" + +static REAL_VALUE_TYPE dconst4, dconst1over2pi; +static bool ext_gcn_constants_init = 0; + + +/* Register modes. */ + +/* Return true if MODE is valid for pair of VGPR registers. */ + +static inline bool +vgpr_2reg_mode_p (enum machine_mode mode) +{ + return mode == DImode || mode == DFmode || mode == V64DFmode + || mode == V64DImode /*|| mode == V64BImode*/; +} + +/* Return true if MODE can be handled directly by VGPR operations. */ + +static inline bool +vgpr_vector_mode_p (enum machine_mode mode) +{ + return mode == V64HImode || mode == V64SImode || mode == V64DImode + || mode == V64HFmode || mode == V64SFmode || mode == V64DFmode; +} + + +/* Return true if MODE is valid for pair of SGPR registers. */ + +static inline bool +sgpr_2reg_mode_p (enum machine_mode mode) +{ + return mode == DImode || mode == DFmode /*|| mode == V32BImode*/; +} + +/* Return number of hard register needed to hold value of MODE in REGNO. */ + +int +gcn_hard_regno_nregs (int regno, enum machine_mode mode) +{ + /* Scalar registers are 32bit, vector registers are in fact tuples of + 64 lanes. */ + if (VGPR_REGNO_P (regno)) + { + if (vgpr_1reg_mode_p (mode)) + return 1; + if (vgpr_2reg_mode_p (mode)) + return 2; + } + return CEIL (GET_MODE_SIZE (mode), 4); +} + +/* Register classes. */ + +/* Implement the TARGET_CLASS_MAX_NREGS hook. + + On the 80386, this is the size of MODE in words, + except in the FP regs, where a single reg is always enough. */ + +static unsigned char +gcn_class_max_nregs (reg_class_t rclass, machine_mode mode) +{ + if (rclass == VGPR_REGS) + { + if (vgpr_1reg_mode_p (mode)) + return 1; + if (vgpr_2reg_mode_p (mode)) + return 2; + } + return CEIL (GET_MODE_SIZE (mode), 4); +} + +/* Return true if REGNO can hold value in MODE. */ + +bool +gcn_hard_regno_mode_ok (int regno, machine_mode mode) +{ + switch (regno) + { + case FLAT_SCRATCH_LO_REG: + case XNACK_MASK_LO_REG: + case TBA_LO_REG: + case TMA_LO_REG: + return (mode == SImode || mode == DImode); + case VCC_LO_REG: + case EXEC_LO_REG: + return (mode == CCmode || mode == CCmode + || mode == SImode || mode == DImode + /*|| mode == V32BImode || mode == V64BImode*/); + case M0_REG: + case FLAT_SCRATCH_HI_REG: + case XNACK_MASK_HI_REG: + case TBA_HI_REG: + case TMA_HI_REG: + return mode == SImode; + case VCC_HI_REG: + case EXEC_HI_REG: + return mode == SImode /*|| mode == V32BImode*/; + case SCC_REG: + case VCCZ_REG: + case EXECZ_REG: + return mode == BImode; + } + /* FIXME: We restrict double register values to aligned registers. */ + if (SGPR_REGNO_P (regno)) + return (sgpr_1reg_mode_p (mode) + || (!((regno - FIRST_VGPR_REG)&1) && sgpr_2reg_mode_p (mode))); + if (VGPR_REGNO_P (regno)) + return (vgpr_1reg_mode_p (mode) || vgpr_2reg_mode_p (mode)); + return false; +} + +/* Return smallest class containing REGNO. */ + +enum reg_class +gcn_regno_reg_class (int regno) +{ + switch (regno) + { + case SCC_REG: + return SCC_CONDITIONAL_REG; + case VCCZ_REG: + return VCCZ_CONDITIONAL_REG; + case EXECZ_REG: + return EXECZ_CONDITIONAL_REG; + case EXEC_LO_REG: + case EXEC_HI_REG: + return EXEC_MASK_REG; + } + if (VGPR_REGNO_P (regno)) + return VGPR_REGS; + if (SGPR_REGNO_P (regno)) + return SGPR_REGS; + if (regno < FIRST_VGPR_REG) + return GENERAL_REGS; + return ALL_REGS; +} + +/* GCC assumes that lowpart contains first part of value as stored in memory. + This is not the case for vector registers. */ + +bool gcn_cannot_change_mode_class (machine_mode from, machine_mode to, + int regclass) +{ + if (!vgpr_vector_mode_p (from) && !vgpr_vector_mode_p (to)) + return false; + return gcn_class_max_nregs (regclass, from) != gcn_class_max_nregs (regclass, to); +} + +static void +print_reg (FILE *file, rtx x) +{ + enum machine_mode mode = GET_MODE (x); + if (mode == SImode || mode == BImode || mode == V64SImode + || mode == HFmode || mode == SFmode || mode == V64SFmode + || mode == V64SImode) + fprintf (file, "%s", reg_names[REGNO (x)]); + else if (mode == DImode || mode == V64DImode + || mode == DFmode || mode == V64DFmode) + { + if (SGPR_REGNO_P (REGNO (x))) + fprintf (file, "s[%i:%i]", REGNO (x) - FIRST_SGPR_REG, + REGNO (x) - FIRST_SGPR_REG + 1); + else if (VGPR_REGNO_P (REGNO (x))) + fprintf (file, "v[%i:%i]", REGNO (x) - FIRST_VGPR_REG, + REGNO (x) - FIRST_VGPR_REG + 1); + else if (REGNO (x) == FLAT_SCRATCH_REG) + fprintf (file, "flat_scratch"); + else if (REGNO (x) == EXEC_REG) + fprintf (file, "exec"); + else if (REGNO (x) == VCC_LO_REG) + fprintf (file, "vcc"); + else + fprintf (file, "[%s:%s]", + reg_names [REGNO (x)], reg_names [REGNO (x)+1]); + } + else if (mode == TImode) + { + if (SGPR_REGNO_P (REGNO (x))) + fprintf (file, "s[%i:%i]", REGNO (x) - FIRST_SGPR_REG, + REGNO (x) - FIRST_SGPR_REG + 3); + else if (VGPR_REGNO_P (REGNO (x))) + fprintf (file, "v[%i:%i]", REGNO (x) - FIRST_VGPR_REG, + REGNO (x) - FIRST_VGPR_REG + 3); + else + gcc_unreachable (); + } + else + gcc_unreachable (); +} + + +/* Initialize the table of extra 80387 mathematical constants. */ + +static void +init_ext_gcn_constants (void) +{ + real_from_integer (&dconst4, DFmode, 4, SIGNED); + + /* FIXME: this constant probably does not match what hardware really loads. + Reality check it eventually. */ + real_from_string (&dconst1over2pi, "0.1591549430918953357663423455968866839"); + real_convert (&dconst1over2pi, SFmode, &dconst1over2pi); + + ext_gcn_constants_init = 1; +} + +/* Return non-zero if X is a constant that can appear as inline operation. + This is 0, 0.5, -0.5, 1, -1, 2, -2, 4,-4, 1/(2*pi) + Or a vector of those. + The value returned should be the encoding of this constant. */ + +int +gcn_inline_fp_constant_p (rtx x, bool allow_vector) +{ + machine_mode mode = GET_MODE (x); + + if ((mode == V64HFmode || mode == V64SFmode || mode == V64DFmode) + && allow_vector) + { + int n; + if (GET_CODE (x) != CONST_VECTOR) + return 0; + n = gcn_inline_fp_constant_p (CONST_VECTOR_ELT (x, 0), false); + if (!n) + return 0; + for (int i = 1; i < 64; i++) + if (CONST_VECTOR_ELT (x, i) != CONST_VECTOR_ELT (x, 0)) + return 0; + return 1; + } + + if (mode != HFmode && mode != SFmode && mode != DFmode) + return 0; + + const REAL_VALUE_TYPE *r; + + if (x == CONST0_RTX (mode)) + return 128; + if (x == CONST1_RTX (mode)) + return 242; + + r = CONST_DOUBLE_REAL_VALUE (x); + + if (real_identical (r, &dconstm1)) + return 243; + + if (real_identical (r, &dconsthalf)) + return 240; + if (real_identical (r, &dconstm1)) + return 243; + if (real_identical (r, &dconst2)) + return 244; + if (real_identical (r, &dconst4)) + return 246; + if (real_identical (r, &dconst1over2pi)) + return 248; + if (!ext_gcn_constants_init) + init_ext_gcn_constants (); + real_value_negate (r); + if (real_identical (r, &dconsthalf)) + return 241; + if (real_identical (r, &dconst2)) + return 245; + if (real_identical (r, &dconst4)) + return 247; + + /* FIXME: add 4, -4 and 1/(2*PI). */ + + return 0; +} + +/* Return non-zero if X is a constant that can appear as inline operation. + This is 0, 0.5, -0.5, 1, -1, 2, -2, 4,-4, 1/(2*pi) + Or a vector of those. + The value returned should be the encoding of this constant. */ + +bool +gcn_fp_constant_p (rtx x, bool allow_vector) +{ + machine_mode mode = GET_MODE (x); + + if ((mode == V64HFmode || mode == V64SFmode || mode == V64DFmode) + && allow_vector) + { + int n; + if (GET_CODE (x) != CONST_VECTOR) + return false; + n = gcn_fp_constant_p (CONST_VECTOR_ELT (x, 0), false); + if (!n) + return false; + for (int i = 1; i < 64; i++) + if (CONST_VECTOR_ELT (x, i) != CONST_VECTOR_ELT (x, 0)) + return false; + return true; + } + if (mode != HFmode && mode != SFmode && mode != DFmode) + return false; + + if (gcn_inline_fp_constant_p (x, false)) + return true; + /* FIXME: It is not clear how 32bit immediates are interpreted here. */ + return (mode != DFmode); +} + +/* Return true if X is constant representable as inline constant. */ + +bool +gcn_inline_constant_p (rtx x) +{ + if (GET_CODE (x) == CONST_INT) + return INTVAL (x) >= -16 && INTVAL (x) < 64; + if (GET_CODE (x) == CONST_DOUBLE) + return gcn_inline_fp_constant_p (x, false); + if (GET_CODE (x) == CONST_VECTOR) + { + int n; + if (!vgpr_vector_mode_p (GET_MODE (x))) + return false; + if (GET_CODE (x) != CONST_VECTOR) + return false; + n = gcn_inline_constant_p (CONST_VECTOR_ELT (x, 0)); + if (!n) + return false; + for (int i = 1; i < 64; i++) + if (CONST_VECTOR_ELT (x, i) != CONST_VECTOR_ELT (x, 0)) + return false; + return 1; + } + return false; +} + +/* Return true if X is constant representable as inline constant. */ + +bool +gcn_constant_p (rtx x) +{ + if (GET_CODE (x) == CONST_INT) + return trunc_int_for_mode (INTVAL (x), SImode) == INTVAL (x); + if (GET_CODE (x) == CONST_DOUBLE) + return gcn_fp_constant_p (x, false); + if (GET_CODE (x) == CONST_VECTOR) + { + int n; + if (!vgpr_vector_mode_p (GET_MODE (x))) + return false; + if (GET_CODE (x) != CONST_VECTOR) + return false; + n = gcn_constant_p (CONST_VECTOR_ELT (x, 0)); + if (!n) + return false; + for (int i = 1; i < 64; i++) + if (CONST_VECTOR_ELT (x, i) != CONST_VECTOR_ELT (x, 0)) + return false; + return 1; + } + return false; +} + +/* GCN defines multiple ways to address memory. */ + +enum gcn_address_type +{ + GCN_ADDR_LOAD, + GCN_ADDR_STORE, + GCN_ADDR_FLAT, + GCN_ADDR_DS, + GCN_ADDR_BUFFERED, +}; + +/* Determine instruction used to access memory in ADDRSPACE. */ + +enum gcn_address_type +gcn_addr_space_type (addr_space_t addrspace) +{ + switch (addrspace) + { + case ADDR_SPACE_SCRATCH: + return GCN_ADDR_BUFFERED; + case ADDR_SPACE_FLAT: + return GCN_ADDR_FLAT; + case ADDR_SPACE_SCALAR_FLAT: + return GCN_ADDR_LOAD; + case ADDR_SPACE_LDS: + case ADDR_SPACE_GDS: + return GCN_ADDR_DS; + default: + gcc_unreachable (); + } +} + +void +print_operand_address_1 (FILE * file, register rtx addr, addr_space_t as) +{ + rtx reg; + rtx offset; + gcn_address_type type = gcn_addr_space_type (as); + + if (type == GCN_ADDR_BUFFERED) + switch (GET_CODE (addr)) + { + case REG: + print_reg (file, addr); + break; + + case PLUS: + reg = XEXP (addr, 0); + offset = XEXP (addr, 1); + print_reg (file, reg); + if (GET_CODE (offset) == CONST_INT) + fprintf (file, " offset:" HOST_WIDE_INT_PRINT_DEC, + INTVAL (offset)); + else + abort (); + break; + + default: + debug_rtx (addr); + abort (); + } + else if (type == GCN_ADDR_FLAT) + { + gcc_assert (GET_CODE (addr) == REG); + print_reg (file, addr); + } + else if (type == GCN_ADDR_DS) + switch (GET_CODE (addr)) + { + case REG: + print_reg (file, addr); + break; + + case PLUS: + reg = XEXP (addr, 0); + print_reg (file, reg); + break; + + default: + debug_rtx (addr); + abort (); + } + else + switch (GET_CODE (addr)) + { + case REG: + print_reg (file, addr); + fprintf (file, ", 0"); + break; + + case PLUS: + reg = XEXP (addr, 0); + offset = XEXP (addr, 1); + print_reg (file, reg); + fprintf (file, ", "); + if (GET_CODE (offset) == REG) + print_reg (file, reg); + else if (GET_CODE (offset) == CONST_INT) + fprintf (file, HOST_WIDE_INT_PRINT_DEC, + INTVAL (offset)); + else + abort (); + break; + + default: + debug_rtx (addr); + abort (); + } +} + +void +print_operand_address (FILE * file, register rtx addr) +{ + print_operand_address_1 (file, addr, ADDR_SPACE_FLAT); +} + +/* ASM printing. + b - print operand size as untyped operand (b16/b32/b64) + i - print operand size as untyped operand (i16/b32/i64) + u - print operand size as untyped operand (u16/u32/u64) + s - print operand size as memory access size + (byte/short/dword/dwordx2/wordx3/dwordx4) + C - print conditional code for s_cbranch (_sccz/_sccnz/_vccz/_vccnz...) + D - print conditional code for s_cmp (eq_u64/lg_u64...) + E - print conditional code for v_cmp (eq_u64/ne_u64...) + A - print address in formating suitable for given address space. + O - print offset:n for data share operations. + */ + +void +print_operand (FILE * file, rtx x, int code) +{ + int xcode = GET_CODE (x); + switch (code) + { + /* Instructions has following suffixes. + If there are two suffixes, first is destination type, second is + source type. + + B32 Bitfield (untyped data) 32-bit + B64 Bitfield (untyped data) 64-bit + F16 floating-point 16-bit + F32 floating-point 32-bit (IEEE 754 single-precision float) + F64 floating-point 64-bit (IEEE 754 double-precision float) + I16 signed 32-bit integer + I32 signed 32-bit integer + I64 signed 64-bit integer + U16 unsigned 32-bit integer + U32 unsigned 32-bit integer + U64 unsigned 64-bit integer */ + + /* Print oprande size as untyped suffix. */ + case 'b': + { + const char *s=""; + enum machine_mode mode = GET_MODE (x); + if (VECTOR_MODE_P (mode)) + mode = GET_MODE_INNER (mode); + switch (GET_MODE_SIZE (mode)) + { + case 2: + s="_b16"; + break; + case 4: + s="_b32"; + break; + case 8: + s="_b64"; + break; + default: + output_operand_lossage ("invalid operand %%xn code"); + return; + } + fputs (s, file); + } + return; + case 'i': + { + const char *s=""; + enum machine_mode mode = GET_MODE (x); + if (VECTOR_MODE_P (mode)) + mode = GET_MODE_INNER (mode); + if (FLOAT_MODE_P (mode)) + switch (GET_MODE_SIZE (mode)) + { + case 2: + s="_f16"; + break; + case 4: + s="_f32"; + break; + case 8: + s="_f64"; + break; + default: + output_operand_lossage ("invalid operand %%xn code"); + return; + } + else + switch (GET_MODE_SIZE (mode)) + { + case 2: + s="_i16"; + break; + case 4: + s="_i32"; + break; + case 8: + s="_i64"; + break; + default: + output_operand_lossage ("invalid operand %%xn code"); + return; + } + fputs (s, file); + } + return; + case 'u': + { + const char *s=""; + enum machine_mode mode = GET_MODE (x); + if (VECTOR_MODE_P (mode)) + mode = GET_MODE_INNER (mode); + switch (GET_MODE_SIZE (mode)) + { + case 2: + s="_u16"; + break; + case 4: + s="_u32"; + break; + case 8: + s="_u64"; + break; + default: + output_operand_lossage ("invalid operand %%xn code"); + return; + } + fputs (s, file); + } + return; + /* Print oprande size as untyped suffix. */ + case 's': + { + const char *s=""; + switch (GET_MODE_SIZE (GET_MODE (x))) + { + case 1: + s="_byte"; + break; + case 2: + s="_short"; + break; + case 4: + s="_dword"; + break; + case 8: + s="_dwordx2"; + break; + case 12: + s="_dwordx3"; + break; + case 16: + s="_dwordx4"; + break; + case 32: + s="_dwordx8"; + break; + case 64: + s="_dwordx16"; + break; + case 256: + s="_dword"; + break; + default: + output_operand_lossage ("invalid operand %%xn code"); + return; + } + fputs (s, file); + } + return; + case 'A': + if (xcode != MEM) + { + output_operand_lossage ("invalid %%xn code"); + return; + } + print_operand_address_1 (file, XEXP (x, 0), MEM_ADDR_SPACE (x)); + return; + case 'O': + { + if (xcode != MEM) + { + output_operand_lossage ("invalid %%xn code"); + return; + } + if ((unsigned) gcn_addr_space_type (MEM_ADDR_SPACE (x)) + == ADDR_SPACE_GDS) + fprintf (file, " gds"); + if (GET_CODE (XEXP (x, 0)) == REG) + return; + if (GET_CODE (XEXP (x, 0)) != PLUS) + { + output_operand_lossage ("invalid %%xn code"); + return; + } + rtx val = XEXP (x, (XEXP (x, 0), 1)); + if (GET_CODE (val) == CONST_VECTOR) + val = CONST_VECTOR_ELT (val, 0); + if (GET_CODE (val) != CONST_INT) + { + output_operand_lossage ("invalid %%xn code"); + return; + } + fprintf (file, " offset:" HOST_WIDE_INT_PRINT_DEC, + INTVAL (val)); + + } + return; + case 'C': + { + const char *s; + bool num = false; + if ((xcode != EQ && xcode != NE) + || !REG_P (XEXP (x, 0))) + { + output_operand_lossage ("invalid %%xn code"); + return; + } + switch (REGNO (XEXP (x, 0))) + { + case VCCZ_REG: + s="_vcc"; + break; + case SCC_REG: + /* For some reason llvm as insist on scc0 instead of sccz. */ + num = true; + s="_scc"; + break; + case EXECZ_REG: + s="_exec"; + break; + default: + output_operand_lossage ("invalid %%xn code"); + return; + } + fputs (s, file); + if (xcode == EQ) + fputc (num ? '0' : 'z', file); + else + fputs (num ? "1" : "nz", file); + return; + } + case 'D': + { + const char *s; + bool cmp_signed = false; + switch (xcode) + { + case EQ: + s="_eq_"; + break; + case NE: + s="_lg_"; + break; + case LT: + s="_lt_"; + cmp_signed = true; + break; + case LE: + s="_le_"; + cmp_signed = true; + break; + case GT: + s="_gt_"; + cmp_signed = true; + break; + case GE: + s="_ge_"; + cmp_signed = true; + break; + case LTU: + s="_lt_"; + break; + case LEU: + s="_le_"; + break; + case GTU: + s="_gt_"; + break; + case GEU: + s="_ge_"; + break; + default: + output_operand_lossage ("invalid %%xn code"); + return; + } + fputs (s, file); + fputc (cmp_signed ? 'i' : 'u', file); + + enum machine_mode mode = GET_MODE (XEXP (x, 0)); + if (mode == VOIDmode) + mode = GET_MODE (XEXP (x, 1)); + switch (GET_MODE_SIZE (mode)) + { + case 4: + s="32"; + break; + case 8: + s="64"; + break; + default: + output_operand_lossage ("invalid operand %%xn code"); + return; + } + fputs (s, file); + return; + } + case 'E': + { + const char *s; + bool cmp_signed = false; + switch (xcode) + { + case EQ: + s="_eq_"; + break; + case NE: + s="_ne_"; + break; + case LT: + s="_lt_"; + cmp_signed = true; + break; + case LE: + s="_le_"; + cmp_signed = true; + break; + case GT: + s="_gt_"; + cmp_signed = true; + break; + case GE: + s="_ge_"; + cmp_signed = true; + break; + case LTU: + s="_lt_"; + break; + case LEU: + s="_le_"; + break; + case GTU: + s="_gt_"; + break; + case GEU: + s="_ge_"; + break; + default: + output_operand_lossage ("invalid %%xn code"); + return; + } + fputs (s, file); + fputc (cmp_signed ? 'i' : 'u', file); + + enum machine_mode mode = GET_MODE (XEXP (x, 0)); + if (mode == VOIDmode) + mode = GET_MODE (XEXP (x, 1)); + switch (GET_MODE_SIZE (mode)) + { + case 4: + s="32"; + break; + case 8: + s="64"; + break; + default: + output_operand_lossage ("invalid operand %%xn code"); + return; + } + fputs (s, file); + return; + } + case 'L': + print_operand (file, gcn_operand_part (GET_MODE (x), x, 0), 0); + return; + case 'H': + print_operand (file, gcn_operand_part (GET_MODE (x), x, 1), 0); + return; + case 0: + if (xcode == REG) + print_reg (file, x); + else if (xcode == MEM) + output_address (GET_MODE (x), XEXP (x, 0)); + else if (xcode == CONST_INT) + fprintf (file, "%i", (int)INTVAL (x)); + else if (xcode == CONST_VECTOR) + print_operand (file, CONST_VECTOR_ELT (x, 0), code); + else if (xcode == CONST_DOUBLE) + { + const char *str; + switch (gcn_inline_fp_constant_p (x, false)) + { + case 240: + str = "0.5"; + break; + case 241: + str = "-0.5"; + break; + case 242: + str = "1.0"; + break; + case 243: + str = "-1.0"; + break; + case 244: + str = "2.0"; + break; + case 245: + str = "-2.0"; + break; + case 246: + str = "4.0"; + break; + case 247: + str = "-4.0"; + break; + case 248: + str = "1/pi"; + break; + default: + rtx ix = simplify_gen_subreg (GET_MODE (x) == DFmode + ? DImode : SImode, + x, GET_MODE (x), 0); + if (x) + print_operand (file, ix, code); + else + output_operand_lossage ("invlaid fp constant"); + return; + break; + } + fprintf (file, str); + return; + } + else + output_addr_const (file, x); + return; + default: + output_operand_lossage ("invalid %%xn code"); + } + gcc_unreachable (); +} + + +/* Addressing. */ + +/* Return true if X is CONST_VECTOR of single constant. */ + +static bool +single_cst_vector_p (rtx x) +{ + if (GET_CODE (x) != CONST_VECTOR) + return false; + for (int i = 1; i < 64; i++) + if (CONST_VECTOR_ELT (x, i) != CONST_VECTOR_ELT (x, 0)) + return false; + return true; +} + +/* Recognizes RTL expressions that are valid memory addresses for an + instruction. The MODE argument is the machine mode for the MEM + expression that wants to use this address. + + It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should + convert common non-canonical forms to canonical form so that they will + be recognized. */ + +static bool +gcn_legitimate_address_p (machine_mode mode, rtx x, bool, + enum gcn_address_type type) +{ + enum machine_mode addr_mode; + switch (type) + { + case GCN_ADDR_LOAD: + case GCN_ADDR_STORE: + addr_mode = DImode; + break; + case GCN_ADDR_BUFFERED: + addr_mode = SImode; + break; + case GCN_ADDR_FLAT: + if (vgpr_vector_mode_p (mode)) + addr_mode = V64DImode; + else + addr_mode = DImode; + break; + case GCN_ADDR_DS: + if (vgpr_vector_mode_p (mode)) + addr_mode = V64SImode; + else + addr_mode = SImode; + break; + } + if (GET_MODE (x) != addr_mode) + return false; + if (type == GCN_ADDR_LOAD || type == GCN_ADDR_STORE) + switch (GET_CODE (x)) + { + case REG: + return (REGNO (x) >= FIRST_PSEUDO_REGISTER + || gcn_sgpr_register_operand (x, DImode)); + /* Addresses are in the form BASE+OFFSET + OFFSET is either 20bit unsigned immediate, SGPR or M0. + Writes and atomics do not accept SGPR. */ + case PLUS: + { + rtx x0 = XEXP (x, 0); + rtx x1 = XEXP (x, 1); + if (GET_CODE (x0) != REG + || (REGNO (x0) <= FIRST_PSEUDO_REGISTER + && !gcn_sgpr_register_operand (x0, DImode))) + return false; + if (GET_CODE (x1) == REG) + { + if (GET_CODE (x1) != REG + || (REGNO (x1) == M0_REG + /* FIXME: maybe this is about parameter of a store. */ + && (type == GCN_ADDR_STORE + || (REGNO (x1) <= FIRST_PSEUDO_REGISTER + && !gcn_sgpr_register_operand (x1, DImode))))) + return false; + } + else if (GET_CODE (x1) == CONST_INT) + { + if (INTVAL (x1) >= 0 && INTVAL (x1) < (1<<20)) + return true; + } + return false; + } + + default: + break; + } + else if (type == GCN_ADDR_BUFFERED) + { + return (GET_CODE (x) == REG + && (REGNO (x) >= FIRST_PSEUDO_REGISTER + || gcn_sgpr_register_operand (x, SImode))); + } + else if (type == GCN_ADDR_FLAT) + { + return (GET_CODE (x) == REG + && (REGNO (x) >= FIRST_PSEUDO_REGISTER + || gcn_vgpr_register_operand (x, DImode))); + } + else if (type == GCN_ADDR_DS) + switch (GET_CODE (x)) + { + case REG: + return (REGNO (x) >= FIRST_PSEUDO_REGISTER + || gcn_vgpr_register_operand (x, DImode)); + /* Addresses are in the form BASE+OFFSET + OFFSET is either 20bit unsigned immediate, SGPR or M0. + Writes and atomics do not accept SGPR. */ + case PLUS: + { + rtx x0 = XEXP (x, 0); + rtx x1 = XEXP (x, 1); + if (GET_CODE (x0) != REG + || (REGNO (x0) <= FIRST_PSEUDO_REGISTER + && !gcn_vgpr_register_operand (x0, DImode))) + return false; + if (GET_CODE (x1) == REG) + { + if (GET_CODE (x1) != REG + || (REGNO (x1) == M0_REG + /* FIXME: maybe this is about parameter of a store. */ + && (type == GCN_ADDR_STORE + || (REGNO (x1) <= FIRST_PSEUDO_REGISTER + && !gcn_sgpr_register_operand (x1, DImode))))) + return false; + } + else if (GET_CODE (x1) == CONST_VECTOR + && GET_CODE (CONST_VECTOR_ELT (x1, 0)) == CONST_INT + && single_cst_vector_p (x1)) + { + x1 = CONST_VECTOR_ELT (x1, 0); + if (INTVAL (x1) >= 0 && INTVAL (x1) < (1<<20)) + return true; + } + return false; + } + + default: + break; + } + else + gcc_unreachable (); + return false; +} + +/* Recognizes RTL expressions that are valid memory addresses for an + instruction. The MODE argument is the machine mode for the MEM + expression that wants to use this address. + + It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should + convert common non-canonical forms to canonical form so that they will + be recognized. */ + +static bool +gcn_addr_space_legitimate_address_p (machine_mode mode, rtx x, bool strict, + addr_space_t as) +{ + return gcn_legitimate_address_p (mode, x, strict, gcn_addr_space_type (as)); +} + + +/* Return the appropriate mode for a named address pointer. */ +static machine_mode +gcn_addr_space_pointer_mode (addr_space_t addrspace) +{ + switch (addrspace) + { + case ADDR_SPACE_SCRATCH: + case ADDR_SPACE_LDS: + case ADDR_SPACE_GDS: + return SImode; + case ADDR_SPACE_FLAT: + case ADDR_SPACE_SCALAR_FLAT: + return DImode; + default: + gcc_unreachable (); + } +} + +/* Return the appropriate mode for a named address address. */ +static machine_mode +gcn_addr_space_address_mode (addr_space_t addrspace) +{ + return gcn_addr_space_pointer_mode (addrspace); +} + +/* Determine if one named address space is a subset of another. */ + +static bool +gcn_addr_space_subset_p (addr_space_t subset, addr_space_t superset) +{ + if (subset == superset) + return true; + if (superset == ADDR_SPACE_FLAT || superset == ADDR_SPACE_SCALAR_FLAT) + return true; + return false; +} + +/* Convert from one address space to another. */ +static rtx +gcn_addr_space_convert (rtx /*op*/, tree /*from_type*/, tree /*to_type*/) +{ + gcc_unreachable (); +} + +/* Retun true if REGNO is OK for memory adressing. */ + +bool +gcn_regno_mode_code_ok_for_base_p (int regno, + machine_mode, addr_space_t, int, int) +{ + if (regno >= FIRST_PSEUDO_REGISTER) + { + if (reg_renumber[regno]) + regno = reg_renumber[regno]; + else + return true; + } + return SGPR_REGNO_P (regno); +} + +/* Retun true if REGNO is OK for memory adressing. */ + +reg_class +gcn_mode_code_base_reg_class (machine_mode /*mode*/, addr_space_t as, int, int) +{ + switch (gcn_addr_space_type (as)) + { + case GCN_ADDR_LOAD: + case GCN_ADDR_STORE: + case GCN_ADDR_BUFFERED: + return GENERAL_REGS; + break; + case GCN_ADDR_FLAT: + case GCN_ADDR_DS: + return VGPR_REGS; + } + gcc_unreachable (); +} + +/* Return true if REGNO is OK for index of memory addressing. */ + +bool +regno_ok_for_index_p (int regno) +{ + if (regno >= FIRST_PSEUDO_REGISTER) + { + if (reg_renumber[regno]) + regno = reg_renumber[regno]; + else + return true; + } + return regno == M0_REG || VGPR_REGNO_P (regno); +} + +/* "Function" calling conventions. */ + +struct gcn_kernel_arg_type +{ + const char *name; + const char *header_pseudo; + int mode; + int fixed_regno; +}; + +#define PRIVATE_SEGMENT_BUFFER_ARG 1 +#define KERNARG_SEGMENT_PTR 4 +#define FLAT_SCRATCH_INIT_ARG 6 +#define FLAT_SCRATCH_SEGMENT_SIZE_ARG 7 +#define WORKGROUP_ID_X_ARG 11 +#define PRIVATE_SEGMENT_WAVE_OFFSET_ARG 15 +#define WORK_ITEM_ID_Y_ARG 17 +#define WORK_ITEM_ID_Z_ARG 18 + +static const struct gcn_kernel_arg_type gcn_kernel_arg_types[] = +{ + {"exec", NULL, DImode, EXEC_REG}, + {"private_segment_buffer", "enable_sgpr_private_segment_buffer", TImode, -1}, + {"dispatch_ptr", "enable_sgpr_dispatch_ptr", DImode, -1}, + {"queue_ptr", "enable_sgpr_queue_ptr", DImode, -1}, + {"kernarg_segment_ptr", "enable_sgpr_kernarg_segment_ptr", DImode, -1}, + {"dispatch_id", "enable_sgpr_dispatch_id", DImode, -1}, + {"flat_scratch_init", "enable_sgpr_flat_scratch_init", DImode, -1}, + {"private_segment_size", "enable_sgpr_private_segment_size", SImode, -1}, + {"grid_workgroup_count_X", "enable_sgpr_grid_workgroup_count_x", SImode, -1}, + {"grid_workgroup_count_Y", "enable_sgpr_grid_workgroup_count_y", SImode, -1}, + {"grid_workgroup_count_Z", "enable_sgpr_grid_workgroup_count_z", SImode, -1}, + {"workgroup_id_X", NULL /*"enable_sgpr_workgroup_id_x"*/, SImode, -1}, + {"workgroup_id_Y", NULL /*"enable_sgpr_workgroup_id_y"*/, SImode, -1}, + {"workgroup_id_Z", NULL /*"enable_sgpr_workgroup_id_z"*/, SImode, -1}, + {"workgroup_info", NULL /*"enable_sgpr_workgroup_info"*/, SImode, -1}, + {"private_segment_wave_offset", + NULL /*"enable_sgpr_private_segment_wave_offset"*/, SImode, -1}, + {"work_item_id_X", NULL, V64SImode, FIRST_VGPR_REG}, + {"work_item_id_Y", NULL, V64SImode, FIRST_VGPR_REG + 1}, + {"work_item_id_Z", NULL, V64SImode, FIRST_VGPR_REG + 2}}; + +static bool +gcn_parse_amdgpu_hsa_kernel_attribute (struct gcn_kernel_args *args, tree list) +{ + bool err = false; + args->requested = (1 << PRIVATE_SEGMENT_BUFFER_ARG) + | (1 << FLAT_SCRATCH_INIT_ARG) + | (1 << KERNARG_SEGMENT_PTR) + | (1 << PRIVATE_SEGMENT_WAVE_OFFSET_ARG); + args->nargs = 0; + + for (int a = 0; a < GCN_KERNEL_ARG_TYPES; a++) + args->reg[a] = -1; + + for (; list; list = TREE_CHAIN (list)) + { + const char *str; + if (TREE_CODE (TREE_VALUE (list)) != STRING_CST) + { + error ("amdgpu_hsa_kernel attribute requires string constant " + "arguments"); + break; + } + str = TREE_STRING_POINTER (TREE_VALUE (list)); + int a; + for (a = 0; a < GCN_KERNEL_ARG_TYPES; a++) + { + if (!strcmp (str, gcn_kernel_arg_types[a].name)) + break; + } + if (a == GCN_KERNEL_ARG_TYPES) + { + error ("unknown specifier %s in amdgpu_hsa_kernel attribute", + str); + err = true; + break; + } + if (args->requested & (1<<a)) + { + error ("duplicated parameter specifier %s in amdgpu_hsa_kernel " + "attribute", + str); + err = true; + break; + } + args->requested |= (1<<a); + args->order[args->nargs++] = a; + } + args->requested |= (1 << WORKGROUP_ID_X_ARG); + + int sgpr_regno = FIRST_SGPR_REG; + for (int a = 0; a < GCN_KERNEL_ARG_TYPES; a++) + { + if (!(args->requested & (1<<a))) + continue; + + if (gcn_kernel_arg_types[a].fixed_regno > 0) + args->reg[a] = gcn_kernel_arg_types[a].fixed_regno; + else if (gcn_kernel_arg_types[a].mode == SImode) + args->reg[a] = sgpr_regno++; + else if (gcn_kernel_arg_types[a].mode == DImode) + { + args->reg[a] = sgpr_regno; + sgpr_regno += 2; + } + else if (gcn_kernel_arg_types[a].mode == TImode) + { + args->reg[a] = sgpr_regno; + sgpr_regno += 4; + } + else + gcc_unreachable (); + } + args->nsgprs = sgpr_regno; + if (sgpr_regno > FIRST_SGPR_REG + 16) + { + error ("too many arguments passed in sgpr registers"); + } + return err; +} + +static tree +gcn_handle_amdgpu_hsa_kernel_attribute (tree *node, tree name, + tree args, int, bool *no_add_attrs) +{ + if (TREE_CODE (*node) != FUNCTION_TYPE + && TREE_CODE (*node) != METHOD_TYPE + && TREE_CODE (*node) != FIELD_DECL + && TREE_CODE (*node) != TYPE_DECL) + { + warning (OPT_Wattributes, "%qE attribute only applies to functions", + name); + *no_add_attrs = true; + return NULL_TREE; + } + + /* Can combine regparm with all attributes but fastcall, and thiscall. */ + if (is_attribute_p ("gcnhsa_kernel", name)) + { + struct gcn_kernel_args kernelarg; + + if (gcn_parse_amdgpu_hsa_kernel_attribute (&kernelarg, args)) + *no_add_attrs = true; + + return NULL_TREE; + } + + return NULL_TREE; +} + +/* Define how to find the value returned by a function. + VALTYPE is the data type of the value (as a tree). + If the precise function being called is known, FUNC is its FUNCTION_DECL; + otherwise, FUNC is 0. */ + +static rtx +gcn_function_value (const_tree valtype, const_tree, bool) +{ + /* FIXME: There is no way to return value. + The value will be simply ignored. Perhaps generic code can be told + to handle NULL in this case. */ + return gen_rtx_REG (TYPE_MODE (valtype), SGPR_REGNO (0)); +} + +/* Return true if N is a possible register number of function value. */ + +static bool +gcn_function_value_regno_p (const unsigned int) +{ + return false; +} + +/* Find a location for the static chain incoming to a nested function. + This is a register, unless all free registers are used by arguments. */ + +static rtx +gcn_static_chain (const_tree, bool) +{ + /* FIXME: there is no function ABI, so we don't support nested + functions either. */ + return NULL; +} + + +static rtx +gcn_function_arg (cumulative_args_t cum_v, machine_mode mode, const_tree type, + bool) +{ + CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v); + if (cum->num >= cum->args.nargs) + { + cum->offset = (cum->offset + TYPE_ALIGN (type) / 8 - 1) + & -(TYPE_ALIGN (type) / 8); + cfun->machine->kernarg_segment_alignment + = MAX ((unsigned) cfun->machine->kernarg_segment_alignment, + TYPE_ALIGN (type) / 8); + rtx addr = gen_rtx_REG (DImode, cum->args.reg [KERNARG_SEGMENT_PTR]); + if (cum->offset) + addr = gen_rtx_PLUS (DImode, addr, gen_int_mode (cum->offset, mode)); + rtx mem = gen_rtx_MEM (mode, addr); + set_mem_attributes (mem, const_cast<tree>(type), 1); + set_mem_addr_space (mem, ADDR_SPACE_SCALAR_FLAT); + MEM_READONLY_P (mem) = 1; + return mem; + } + + int a = cum->args.order[cum->num]; + if (mode != gcn_kernel_arg_types[a].mode) + { + error ("wrong type of argument %s", gcn_kernel_arg_types[a].name); + return 0; + } + return gen_rtx_REG ((machine_mode)gcn_kernel_arg_types[a].mode, + cum->args.reg[a]); +} + +static void +gcn_function_arg_advance (cumulative_args_t cum_v, machine_mode, + const_tree type, bool) +{ + CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v); + if (cum->num < cum->args.nargs) + cum->num++; + else + { + cum->offset += tree_to_uhwi (TYPE_SIZE_UNIT (type)); + cfun->machine->kernarg_segment_byte_size = cum->offset; + } +} + + +/* Initialize a variable CUM of type CUMULATIVE_ARGS for a call to a function + whose data type is FNTYPE. For a library call, FNTYPE is 0. */ + +void +gcn_init_cumulative_args (CUMULATIVE_ARGS *cum /* Argument info to init */, + tree fntype /* tree ptr for function decl */, + rtx libname /* SYMBOL_REF of library name or 0 */, + tree fndecl, + int caller) +{ + memset (cum, 0, sizeof (*cum)); + if (libname) + { + sorry ("no one decided on calling convention yet: can not expand " + "libcall %s0", XSTR (libname, 0)); + return; + } + tree attr = NULL; + if (fndecl) + attr = lookup_attribute ("amdgpu_hsa_kernel", DECL_ATTRIBUTES (fndecl)); + if (fndecl && !attr) + attr = lookup_attribute ("amdgpu_hsa_kernel", + TYPE_ATTRIBUTES (TREE_TYPE (fndecl))); + if (!attr && fntype) + attr = lookup_attribute ("amdgpu_hsa_kernel", TYPE_ATTRIBUTES (fntype)); + /* Handle main () as kernel, so we can run testsuite. */ + if (!attr && !caller && fndecl && MAIN_NAME_P (DECL_NAME (fndecl))) + gcn_parse_amdgpu_hsa_kernel_attribute (&cum->args, NULL_TREE); + else + { + if (!attr || caller) + { + sorry ("no one decided on calling convention yet; please just do " + "not call functions"); + return; + } + gcn_parse_amdgpu_hsa_kernel_attribute + (&cum->args, TREE_VALUE (attr)); + } + cfun->machine->args = cum->args; + /* We changed regno of frame pointer. */ + init_emit_regs (); + init_regs (); +} + +/* Expanders. */ + +/* Return N-th part of value occupying multiple registers. */ + +rtx +gcn_operand_part (enum machine_mode mode, rtx op, int n) +{ + if (mode == V64DImode || mode == V64SImode) + { + if (REG_P (op)) + { + gcc_assert (REGNO (op) + n < FIRST_PSEUDO_REGISTER); + return gen_rtx_REG (V64SImode, REGNO (op) + n); + } + if (GET_CODE (op) == CONST_VECTOR) + { + int units = GET_MODE_NUNITS (mode); + rtvec v = rtvec_alloc (units); + + for (int i = 0; i < units; ++i) + RTVEC_ELT (v, i) = gcn_operand_part (GET_MODE_INNER (mode), + CONST_VECTOR_ELT (op, i), n); + + return gen_rtx_CONST_VECTOR (mode, v); + } + if (GET_CODE (op) == UNSPEC + && XINT (op, 1) == UNSPEC_VECTOR) + return gcn_gen_undef (V64SImode); + gcc_unreachable (); + } + else + { + if (GET_CODE (op) == UNSPEC + && XINT (op, 1) == UNSPEC_VECTOR) + return gcn_gen_undef (SImode); + return simplify_gen_subreg (SImode, op, mode, n * 4); + } +} + +/* Return N-th part of value occupying multiple registers. */ + +rtx +gcn_operand_doublepart (enum machine_mode mode, rtx op, int n) +{ + return simplify_gen_subreg (DImode, op, mode, n * 8); +} + +/* Split all operands in OPERANDS into parts so first NOPERANDS corresponds + to the first part and so on. */ + +void +gcn_split_operands (enum machine_mode mode, rtx *operands, + int nparts, int noperands) +{ + for (int i = nparts - 1; i >= 0; i--) + for (int j = 0; j < noperands; j++) + operands [i*noperands + j] = gcn_operand_part (mode, operands[j], i); +} + +/* Return true if OP can be splitted. */ + +bool +gcn_can_split_p (enum machine_mode, rtx op) +{ + if (vgpr_vector_mode_p (GET_MODE (op))) + { + if (GET_CODE (op) == SUBREG) + op = SUBREG_REG (op); + if (!REG_P (op)) + return true; + return REGNO (op) <= FIRST_PSEUDO_REGISTER; + } + return true; +} + +/* Return true if all of OPERANDS can be splitted by gcnsplit_operands. */ + +bool +gcn_can_split_operands_p (enum machine_mode mode, rtx *operands, + int noperands) +{ + for (int i = noperands - 1; i >= 0; i--) + if (!gcn_can_split_p (mode, operands[i])) + return false; + return true; +} + +rtx +gcn_gen_undef (enum machine_mode mode) +{ + return gen_rtx_UNSPEC (mode, gen_rtvec (1, const0_rtx), UNSPEC_VECTOR); +} + +rtx +gcn_vec_constant (enum machine_mode mode, int a) +{ + /*if (!a) + return CONST0_RTX (mode); + if (a == -1) + return CONSTM1_RTX (mode); + if (a == 1) + return CONST1_RTX (mode); + if (a == 2) + return CONST2_RTX (mode);*/ + + int units = GET_MODE_NUNITS (mode); + rtx tem = gen_int_mode (a, GET_MODE_INNER (mode)); + rtvec v = rtvec_alloc (units); + + for (int i = 0; i < units; ++i) + RTVEC_ELT (v, i) = tem; + + return gen_rtx_CONST_VECTOR (mode, v); +} + +rtx +gcn_vec_constant (enum machine_mode mode, rtx a) +{ + int units = GET_MODE_NUNITS (mode); + rtvec v = rtvec_alloc (units); + + for (int i = 0; i < units; ++i) + RTVEC_ELT (v, i) = a; + + return gen_rtx_CONST_VECTOR (mode, v); +} + +static rtx +get_exec (int64_t val) +{ + rtx reg = gen_reg_rtx (DImode); + emit_insn (gen_rtx_SET (reg, gen_int_mode (val, DImode))); + return reg; +} + +static rtx +get_exec (rtx val) +{ + rtx reg = gen_reg_rtx (DImode); + emit_insn (gen_rtx_SET (reg, val)); + return reg; +} + +/* Generate move which uses the exec flags. */ + +rtx +gen_mov_with_exec (rtx op0, rtx op1, rtx exec = NULL) +{ + enum machine_mode mode = GET_MODE (op0); + if (vgpr_vector_mode_p (mode)) + return (gen_rtx_SET (op0, + gen_rtx_VEC_MERGE (mode, op1, gcn_gen_undef (mode), + exec))); + return (gen_rtx_PARALLEL + (VOIDmode, + gen_rtvec (2, gen_rtx_SET (op0, op1), + gen_rtx_USE (VOIDmode, exec ? exec + : gcn_scalar_exec ())))); +} + +/* Load vector constant where n-th lane contains BASE+n*VAL. */ + +static rtx +strided_constant (enum machine_mode mode, int base, int val) +{ + rtx x = gen_reg_rtx (mode); + emit_insn (gen_mov_with_exec (x, gcn_vec_constant (mode, base), + gcn_full_exec_reg ())); + emit_insn (gen_addv64si3_vector (x, x, gcn_vec_constant (mode, val * 32), + get_exec (0xffffffff00000000), x)); + emit_insn (gen_addv64si3_vector (x, x, gcn_vec_constant (mode, val * 16), + get_exec (0xffff0000ffff0000), x)); + emit_insn (gen_addv64si3_vector (x, x, gcn_vec_constant (mode, val * 8), + get_exec (0xff00ff00ff00ff00), x)); + emit_insn (gen_addv64si3_vector (x, x, gcn_vec_constant (mode, val * 4), + get_exec (0xf0f0f0f0f0f0f0f0), x)); + emit_insn (gen_addv64si3_vector (x, x, gcn_vec_constant (mode, val * 2), + get_exec (0xcccccccccccccccc), x)); + emit_insn (gen_addv64si3_vector (x, x, gcn_vec_constant (mode, val * 1), + get_exec (0xaaaaaaaaaaaaaaaa), x)); + return x; +} + +/* Prepare address to load vector of MODE from address OP. */ + +static rtx +prepare_flat_address (enum machine_mode mode, rtx op, rtx exec) +{ + rtx addrs = gen_reg_rtx (V64DImode); + rtx base = force_reg (DImode, XEXP (op, 0)); + rtx offsets = strided_constant (V64SImode, 0, GET_MODE_UNIT_SIZE (mode)); + + emit_insn (gen_vec_duplicatev64di + (addrs, base, exec, gcn_gen_undef (V64DImode))); + + emit_insn (gen_addv64di3_zext (addrs, offsets, addrs, exec, + gcn_gen_undef (V64DImode))); + return change_address (op, GET_MODE (op), addrs); +} + +/* Legitmize address X. */ + +static rtx +gcn_addr_space_legitimize_address (rtx x, rtx, machine_mode mode, + addr_space_t as) +{ + switch (gcn_addr_space_type (as)) + { + case GCN_ADDR_LOAD: + case GCN_ADDR_STORE: + case GCN_ADDR_BUFFERED: + return x; + break; + case GCN_ADDR_FLAT: + if (vgpr_vector_mode_p (mode) + && GET_MODE (x) != V64DImode) + { + rtx exec = gcn_full_exec_reg (); + rtx addrs = gen_reg_rtx (V64DImode); + rtx base = force_reg (DImode, x); + rtx offsets = strided_constant (V64SImode, 0, + GET_MODE_UNIT_SIZE (mode)); + + emit_insn (gen_vec_duplicatev64di + (addrs, base, exec, gcn_gen_undef (V64DImode))); + + emit_insn (gen_addv64di3_zext (addrs, offsets, addrs, exec, + gcn_gen_undef (V64DImode))); + return addrs; + } + case GCN_ADDR_DS: + /* FIXME: LDS support offsets, handle them!. */ + if (vgpr_vector_mode_p (mode) + && GET_MODE (x) != V64SImode) + { + rtx exec = gcn_full_exec_reg (); + rtx addrs = gen_reg_rtx (V64SImode); + rtx base = force_reg (SImode, x); + rtx offsets = strided_constant (V64SImode, 0, + GET_MODE_UNIT_SIZE (mode)); + + emit_insn (gen_vec_duplicatev64si + (addrs, base, exec, gcn_gen_undef (V64SImode))); + + emit_insn (gen_addv64si3_vector (addrs, offsets, addrs, exec, + gcn_gen_undef (V64SImode))); + return addrs; + } + return x; + } + gcc_unreachable (); +} + +/* Move expander: move op1 to op0 in MODE. + Only handle special cases that can not be handled directly. + Return true if RTX was generated. */ + +bool +gcn_expand_mov (enum machine_mode mode, rtx op0, rtx op1) +{ + /* At least one of the operands needs to be a register. */ + if (!register_operand (op0, mode) && !register_operand (op1, mode)) + { + rtx temp = force_reg (mode, op1); + emit_move_insn (op0, temp); + return 1; + } + if (GET_MODE (op0) == QImode || GET_MODE (op0) == HImode) + { + /* We do not have sub-word register move patterns; just promote to + SImode. */ + if (!memory_operand (op0, mode) && !memory_operand (op1, mode)) + { + op0 = simplify_gen_subreg (SImode, op0, mode, 0); + op1 = simplify_gen_subreg (SImode, op1, mode, 0); + } + /* Scalar flat loads do not support sub-word accesses; emulate. + Hardware ignored lower 3 bits of the address, so we need to load + aligned address and adjust */ + else if (GET_CODE (op1) == MEM + && MEM_ADDR_SPACE (op1) == ADDR_SPACE_SCALAR_FLAT) + { + rtx addr = force_reg (DImode, XEXP (op1, 0)); + int align = get_mem_align_offset (op1, 32); + rtx tmp = gen_reg_rtx (SImode); + + if (align >= 0) + { + rtx mem = widen_memory_access (op1, SImode, -align / 8); + emit_move_insn (tmp, mem); + if (align) + tmp + = expand_simple_binop (SImode, ASHIFTRT, tmp, + GEN_INT (align * 8), NULL_RTX, 1, + OPTAB_DIRECT); + emit_move_insn (simplify_gen_subreg (SImode, op0, mode, 0), + tmp); + } + else + { + rtx aligned = gen_reg_rtx (DImode); + emit_move_insn (addr, aligned); + emit_insn (gen_andsi3 (simplify_gen_subreg (SImode, aligned, + DImode, 0), + simplify_gen_subreg (SImode, aligned, + DImode, 0) , + gen_int_mode (-3, DImode))); + rtx mem = change_address (op1, SImode, aligned); + set_mem_align (mem, 4) ; + emit_move_insn (tmp, mem); + rtx offset + = expand_simple_binop (SImode, AND, + simplify_gen_subreg (SImode, addr, + DImode, 0), + gen_int_mode (3, DImode), + NULL_RTX, 1, OPTAB_DIRECT); + offset + = expand_simple_binop (SImode, ASHIFT, offset, + gen_int_mode (3, DImode), NULL_RTX, 1, + OPTAB_DIRECT); + rtx tmp2 + = expand_simple_binop (SImode, ASHIFTRT, tmp, offset, NULL_RTX, + 1, OPTAB_DIRECT); + emit_move_insn (simplify_gen_subreg (SImode, op0, mode, 0), tmp2); + } + return 1; + } + } + + if (GET_CODE (op1) == CONST_VECTOR && !gcn_constant_p (op1)) + { + gcn_expand_vector_init (op0, op1); + return 1; + } + bool reg_exec = gcn_vgpr_move_p (op0, op1); + /* Scalar flat load. */ + if (GET_CODE (op1) == MEM && MEM_ADDR_SPACE (op1) == ADDR_SPACE_SCALAR_FLAT) + return 0; + /* Scalar flat store. */ + if (GET_CODE (op0) == MEM && MEM_ADDR_SPACE (op0) == ADDR_SPACE_SCALAR_FLAT) + { + sorry ("scalar flat stores are not supported yet"); + return 0; + } + + rtx exec + = vgpr_vector_mode_p (mode) ? gcn_full_exec () : gcn_scalar_exec (); + if (reg_exec && !REG_P (exec)) + exec = get_exec (exec); + + /* Buffer load/stores for scratch memory segment. */ + if ((GET_CODE (op0) == MEM + && MEM_ADDR_SPACE (op0) == ADDR_SPACE_SCRATCH) + || (GET_CODE (op1) == MEM + && MEM_ADDR_SPACE (op1) == ADDR_SPACE_SCRATCH)) + { + emit_insn (gen_rtx_PARALLEL (VOIDmode, + gen_rtvec (3, gen_rtx_SET (op0, op1), + gen_rtx_USE (VOIDmode, + gen_rtx_REG + (TImode, 0)), + gen_rtx_USE (VOIDmode, exec)))); + return 1; + } + /* Stores to hard registers can be optimized because we know if there + will be a need for exec or not. */ + if (gcn_sgpr_register_operand (op0, mode) + && (gcn_sgpr_register_operand (op1, mode) + || !register_operand (op1, mode))) + return 0; + + /* LRA needs to have memory among the altenratives. Arrange this by always + expanidng buffer load/store pattern that also allow reg-reg moves. */ + if (lra_in_progress && !vgpr_vector_mode_p (mode) && !reg_exec) + { + emit_insn (gen_rtx_PARALLEL (VOIDmode, + gen_rtvec (4, gen_rtx_SET (op0, op1), + gen_rtx_USE (VOIDmode, + gen_rtx_REG + (TImode, 0)), + gen_rtx_USE (VOIDmode, exec), + gen_rtx_CLOBBER (VOIDmode, + gen_reg_rtx + (DImode))))); + return 1; + } + emit_insn (gen_mov_with_exec (op0, op1, exec)); + return 1; +} + +/* Generate masked move. */ + +rtx +gen_masked_mov (rtx op0, rtx op1, rtx exec) +{ + return (gen_rtx_SET (op0, + gen_rtx_VEC_MERGE (GET_MODE (op0), + op1, op0, exec))); +} + +/* Generate masked move. */ + +rtx +gen_masked_scalar_load (rtx op0, rtx op1, rtx op2, rtx exec) +{ + return (gen_rtx_SET (op0, + gen_rtx_VEC_MERGE (GET_MODE (op0), + gen_rtx_VEC_DUPLICATE (GET_MODE (op0), + op1), + op2, exec))); +} + +/* Expand vector init of OP0 by VEC. */ + +void +gcn_expand_vector_init (rtx op0, rtx vec) +{ + int64_t initialized_mask = 0; + int64_t curr_mask = 1; + machine_mode mode = GET_MODE (op0); + + rtx val = XVECEXP (vec, 0, 0); + + for (int i = 1; i < 64; i++) + if (rtx_equal_p (val, XVECEXP (vec, 0, i))) + curr_mask |= (int64_t)1 << i; + + if (gcn_constant_p (val)) + emit_insn (gen_mov_with_exec (op0, gcn_vec_constant (mode, val), + gcn_full_exec_reg ())); + else + { + val = force_reg (GET_MODE_INNER (mode), val); + emit_insn (gen_masked_scalar_load (op0, val, gcn_gen_undef (mode), + gcn_full_exec_reg ())); + } + initialized_mask |= curr_mask; + for (int i = 1; i < 64; i++) + if (!(initialized_mask & ((int64_t)1 << i))) + { + curr_mask = (int64_t)1 << i; + rtx val = XVECEXP (vec, 0, i); + + for (int j = i + 1; j < 64; j++) + if (rtx_equal_p (val, XVECEXP (vec, 0, j))) + curr_mask |= (int64_t)1 << j; + if (gcn_constant_p (val)) + emit_insn (gen_masked_mov (op0, gcn_vec_constant (mode, val), + get_exec (curr_mask))); + else + { + val = force_reg (GET_MODE_INNER (mode), val); + emit_insn (gen_masked_scalar_load (op0, val, op0, + get_exec (curr_mask))); + } + initialized_mask |= curr_mask; + } +} + +/* Return true if move from OP0 to OP1 is known to be executed in vector + unit. */ + +bool +gcn_vgpr_move_p (rtx op0, rtx op1) +{ + if (MEM_P (op0) && MEM_ADDR_SPACE (op0) != ADDR_SPACE_SCALAR_FLAT) + return true; + if (MEM_P (op1) && MEM_ADDR_SPACE (op1) != ADDR_SPACE_SCALAR_FLAT) + return true; + return ((REG_P (op0) && VGPR_REGNO_P (REGNO (op0))) + || (REG_P (op1) && VGPR_REGNO_P (REGNO (op1))) + || vgpr_vector_mode_p (GET_MODE (op0))); +} + +bool +gcn_sgpr_move_p (rtx op0, rtx op1) +{ + if (MEM_P (op0) && MEM_ADDR_SPACE (op0) == ADDR_SPACE_SCALAR_FLAT) + return true; + if (MEM_P (op1) && MEM_ADDR_SPACE (op1) == ADDR_SPACE_SCALAR_FLAT) + return true; + if (!REG_P (op0) || REGNO (op0) > FIRST_PSEUDO_REGISTER + || VGPR_REGNO_P (REGNO (op0))) + return false; + if (REG_P (op1) + && REGNO (op1) < FIRST_PSEUDO_REGISTER + && !VGPR_REGNO_P (REGNO (op1))) + return true; + return immediate_operand (op1, VOIDmode) || memory_operand (op1, VOIDmode); +} + +static void +output_file_start (void) +{ + fprintf (asm_out_file, "\t.hsatext\n"); + fprintf (asm_out_file, "\t.hsa_code_object_version 2,0\n"); + fprintf (asm_out_file, "\t.hsa_code_object_isa\n"); /* Autodetect. */ + fprintf (asm_out_file, "\t.section\t.AMDGPU.config\n"); + fprintf (asm_out_file, "\t.hsatext\n"); +} + +/* This function produces the initial definition of a function name. */ + +void +gcn_hsa_declare_function_name (FILE *file, const char *name, tree) +{ + int sgpr, vgpr; + bool xnack_enabled = false; + int extra_regs = 0; + + /* Determine count of sgpr/vgpr registers by looking for last + one used. */ + for (sgpr = 101; sgpr >= 0; sgpr--) + if (df_regs_ever_live_p (FIRST_SGPR_REG + sgpr)) + break; + sgpr++; + for (vgpr = 255; vgpr >= 0; vgpr--) + if (df_regs_ever_live_p (FIRST_VGPR_REG + vgpr)) + break; + vgpr++; + + if (xnack_enabled) + extra_regs = 6; + if (df_regs_ever_live_p (FLAT_SCRATCH_LO_REG) + || df_regs_ever_live_p (FLAT_SCRATCH_HI_REG)) + extra_regs = 4; + else if (df_regs_ever_live_p (VCC_LO_REG) || df_regs_ever_live_p (VCC_HI_REG)) + extra_regs = 2; + + fprintf (file, "\t.type\t%s,@function\n", name); + fprintf (file, "\t.amdgpu_hsa_kernel\t%s\n", name); + fprintf (file, "%s:\n", name); + fprintf (file, "\t.amd_kernel_code_t\n" + "\t\tkernel_code_version_major = 1\n" + "\t\tkernel_code_version_minor = 0\n" + "\t\tmachine_kind = 1\n" + "\t\tmachine_version_major = 8\n" + "\t\tmachine_version_minor = 0\n" + "\t\tmachine_version_stepping = 1\n" + "\t\tkernel_code_entry_byte_offset = 256\n" + "\t\tkernel_code_prefetch_byte_size = 0\n" + "\t\tmax_scratch_backing_memory_byte_size = 0\n" + "\t\tcompute_pgm_rsrc1_vgprs = %i\n" + "\t\tcompute_pgm_rsrc1_sgprs = %i\n" + "\t\tcompute_pgm_rsrc1_priority = 0\n" + "\t\tcompute_pgm_rsrc1_float_mode = 192\n" + "\t\tcompute_pgm_rsrc1_priv = 0\n" + "\t\tcompute_pgm_rsrc1_dx10_clamp = 1\n" + "\t\tcompute_pgm_rsrc1_debug_mode = 0\n" + "\t\tcompute_pgm_rsrc1_ieee_mode = 1\n" + /* We enable scratch memory. */ + "\t\tcompute_pgm_rsrc2_scratch_en = 1\n" + "\t\tcompute_pgm_rsrc2_user_sgpr = %i\n" + "\t\tcompute_pgm_rsrc2_tgid_x_en = 1\n" + "\t\tcompute_pgm_rsrc2_tgid_y_en = 0\n" + "\t\tcompute_pgm_rsrc2_tgid_z_en = 0\n" + "\t\tcompute_pgm_rsrc2_tg_size_en = 0\n" + "\t\tcompute_pgm_rsrc2_tidig_comp_cnt = 0\n" + "\t\tcompute_pgm_rsrc2_excp_en_msb = 0\n" + "\t\tcompute_pgm_rsrc2_lds_size = 8\n" /*FIXME */ + "\t\tcompute_pgm_rsrc2_excp_en = 0\n", + (vgpr - 1) / 4, + /* Must match wavefront_sgpr_count */ + (sgpr + extra_regs - 1) / 8, + /* The total number of SGPR user data registers requested. This + number must match the number of user data registers enabled. */ + cfun->machine->args.nsgprs - 2); + for (int a = 0; a < GCN_KERNEL_ARG_TYPES; a++) + if (gcn_kernel_arg_types[a].header_pseudo) + fprintf (file, "\t\t%s = %i\n", + gcn_kernel_arg_types[a].header_pseudo, + (cfun->machine->args.requested & (1<<a)) != 0); + /*fprintf (file, "\t\tenable_vgpr_workitem_id = %i\n", + (cfun->machine->args.requested & (1 << WORK_ITEM_ID_Z_ARG)) + ? 2 + : cfun->machine->args.requested & (1 << WORK_ITEM_ID_Y_ARG) + ? 1 : 0);*/ + fprintf (file, "\t\tenable_ordered_append_gds = 0\n" + "\t\tprivate_element_size = 1\n" + "\t\tis_ptr64 = 1\n" + "\t\tis_dynamic_callstack = 0\n" + "\t\tis_debug_enabled = 0\n" + "\t\tis_xnack_enabled = %i\n" + "\t\tworkitem_private_segment_byte_size = %i\n" + "\t\tworkgroup_group_segment_byte_size = 0\n" + "\t\tgds_segment_byte_size = 0\n" + "\t\tkernarg_segment_byte_size = %i\n" + "\t\tworkgroup_fbarrier_count = 0\n" + "\t\twavefront_sgpr_count = %i\n" + "\t\tworkitem_vgpr_count = %i\n" + "\t\treserved_vgpr_first = 0\n" + "\t\treserved_vgpr_count = 0\n" + "\t\treserved_sgpr_first = 0\n" + "\t\treserved_sgpr_count = 0\n" + "\t\tdebug_wavefront_private_segment_offset_sgpr = 0\n" + "\t\tdebug_private_segment_buffer_sgpr = 0\n" + "\t\tkernarg_segment_alignment = %i\n" + "\t\tgroup_segment_alignment = 4\n" + "\t\tprivate_segment_alignment = %i\n" + "\t\twavefront_size = 6\n" + "\t\tcall_convention = 0\n" + "\t\truntime_loader_kernel_symbol = 0\n" + "\t.end_amd_kernel_code_t\n", + xnack_enabled, + (int)get_frame_size (), + cfun->machine->kernarg_segment_byte_size, + /* Number of scalar registers used by a wavefront. This + includes the special SGPRs for VCC, Flat Scratch (Base, + Size) and XNACK (for GFX8 (VI)+). It does not include the + 16 SGPR added if a trap handler is enabled. Must match + compute_pgm_rsrc1.sgprs. */ + sgpr + extra_regs, vgpr, + cfun->machine->kernarg_segment_alignment, + crtl->stack_alignment_needed / 8); +} + +/* Generate prologue. */ + +void +gcn_expand_prologue () +{ + /* Flat access to LDS requires apperture setup and M0 register setup. */ + emit_insn (gen_rtx_SET (gen_rtx_REG (SImode, M0_REG), + gen_int_mode (65536, SImode))); + /* Flat access also need flat_scratch register initialized. */ + if (cfun->machine->args.reg[FLAT_SCRATCH_INIT_ARG] > 0) + { + rtx arg = gen_rtx_REG (DImode, + cfun->machine->args.reg[FLAT_SCRATCH_INIT_ARG]); + emit_insn (gen_lshrsi3_scalar (gcn_operand_part + (DImode, gen_rtx_REG (DImode, + FLAT_SCRATCH_REG), + 1), + gcn_operand_part (DImode, arg, 0), + gen_int_mode (8, SImode))); + emit_insn (gen_rtx_SET (gcn_operand_part (DImode, + gen_rtx_REG (DImode, + FLAT_SCRATCH_REG), + 0), + gcn_operand_part (DImode, arg, 1))); + } + if (df_regs_ever_live_p (HARD_FRAME_POINTER_REGNUM)) + emit_insn (gen_rtx_SET (gen_rtx_REG (SImode, HARD_FRAME_POINTER_REGNUM), + gen_rtx_REG (SImode, + cfun->machine->args.reg + [PRIVATE_SEGMENT_WAVE_OFFSET_ARG]))); + emit_insn (gen_prologue_use (gen_rtx_REG (SImode, M0_REG))); +} + +/* Compute a (partial) cost for rtx X. Return true if the complete + cost has been computed, and false if subexpressions should be + scanned. In either case, *TOTAL contains the cost result. */ + +static bool +gcn_rtx_costs (rtx x, machine_mode, int, int, int *total, bool) +{ + enum rtx_code code = GET_CODE (x); + switch (code) + { + case CONST: + case CONST_DOUBLE: + case CONST_VECTOR: + case CONST_INT: + if (gcn_inline_constant_p (x)) + *total = 0; + if (code == CONST_INT + && ((unsigned HOST_WIDE_INT) INTVAL (x) + 0x8000) < 0x10000) + *total = 1; + else if (gcn_constant_p (x)) + *total = 2; + else + *total = vgpr_vector_mode_p (GET_MODE (x)) ? 64 : 4; + return true; + + default: + *total = 3; + return false; + } +} + +/* Return the cost of moving data of mode M between a + register and memory. A value of 2 is the default; this cost is + relative to those in `REGISTER_MOVE_COST'. + + This function is used extensively by register_move_cost that is used to + build tables at startup. Make it inline in this case. + When IN is 2, return maximum of in and out move cost. + + If moving between registers and memory is more expensive than + between two registers, you should define this macro to express the + relative cost. + + Model also increased moving costs of QImode registers in non + Q_REGS classes. + */ + +#define LOAD_COST 32 +#define STORE_COST 32 +static int +gcn_memory_move_cost (machine_mode mode, reg_class_t regclass, bool in) +{ + int nregs = CEIL (GET_MODE_SIZE (mode), 4); + switch (regclass) + { + case SCC_CONDITIONAL_REG: + case VCCZ_CONDITIONAL_REG: + case VCC_CONDITIONAL_REG: + case EXECZ_CONDITIONAL_REG: + case ALL_CONDITIONAL_REGS: + case SGPR_REGS: + case SGPR_EXEC_REGS: + case EXEC_MASK_REG: + case SGPR_VOP3A_SRC_REGS: + case SGPR_MEM_SRC_REGS: + case SGPR_SRC_REGS: + case SGPR_DST_REGS: + case GENERAL_REGS: + if (!in) + return (STORE_COST+2) * nregs; + return LOAD_COST * nregs; + case VGPR_REGS: + if (in) + return (LOAD_COST+2) * nregs; + return STORE_COST * nregs; + case ALL_REGS: + case SRCDST_REGS: + if (in) + return (LOAD_COST+2) * nregs; + return (STORE_COST + 2) * nregs; + default: + gcc_unreachable (); + } +} + +/* Return the cost of moving data from a register in class CLASS1 to + one in class CLASS2. Base value is 2. */ +static int +gcn_register_move_cost (machine_mode, reg_class_t dst, + reg_class_t src) +{ + /* Increase cost of moving from and to vector registers. While this is + fast in hardware (I think), it has hidden cost of setting up the exec + flags. */ + if ((src < VGPR_REGS) != (dst < VGPR_REGS)) + return 4; + return 2; +} + +/* Return class of registers which could be used for pseudo of MODE + and of class RCLASS for spilling instead of memory. Return NO_REGS + if it is not possible or non-profitable. */ + +static reg_class_t +gcn_spill_class (reg_class_t /*c*/, machine_mode /*mode*/) +{ + return SGPR_REGS; +/* + return NO_REGS; + if (mode == DImode) + return SGPR_REGS; + return c;*/ + /*return ALL_REGS;*/ +} + +/* Change allocno class for given pseudo from allocno and best class calculated + by IRA. + + Be sure we do not try to spill into memory, since this is not supported + (yet). */ + +static reg_class_t +gcn_ira_change_pseudo_allocno_class (int, reg_class_t cl, reg_class_t) +{ + /*if (cl == NO_REGS) + return VGPR_REGS;*/ + return cl; +} + +/* Target hook for scalar_mode_supported_p. */ + +static bool +gcn_scalar_mode_supported_p (machine_mode mode) +{ + return default_scalar_mode_supported_p (mode); +} + +/* Implements target hook vector_mode_supported_p. */ + +static bool +gcn_vector_mode_supported_p (machine_mode mode) +{ + return mode == V64SImode || mode == V64DImode || mode == V64SFmode; +} + +/* Initialize machine_function. */ + +static struct machine_function * +gcn_init_machine_status (void) +{ + struct machine_function *f; + + f = ggc_cleared_alloc<machine_function> (); + f->exec_reg = NULL_RTX; + return f; +} + +/* Implement the TARGET_OPTION_OVERRIDE hook. */ + +static void +gcn_option_override (void) +{ + init_machine_status = gcn_init_machine_status; +} + +/* Return pseudo holding default exec register. */ + +rtx +gcn_default_exec () +{ + if (cfun->machine->exec_reg) + { + /* During RTX init we are called with no function CFG built. */ + if (!cfun->machine->exec_reg_init + && cfun->cfg && 0) + { + cfun->machine->exec_reg_init = true; + } + return cfun->machine->exec_reg; + } + else + { + cfun->machine->exec_reg = gen_reg_rtx (DImode); + if (cfun->cfg && 0) + { + emit_insn_before (gen_rtx_SET (cfun->machine->exec_reg, + gen_rtx_REG (DImode, EXEC_REG)), + NEXT_INSN (entry_of_function ())); + cfun->machine->exec_reg_init = true; + } + } + return cfun->machine->exec_reg; +} + +/* Return value of scalar exec register. */ + +rtx +gcn_scalar_exec () +{ + return const1_rtx; +} + +/* Return value of full exec register. */ + +rtx +gcn_full_exec () +{ + return constm1_rtx; +} + +/* Return pseudo holding full exec register. */ + +rtx +gcn_full_exec_reg () +{ + return get_exec (-1); +} + +/* Set live registers passed to the kernel. */ + +static void +gcn_live_on_entry (bitmap regs) +{ + /* This register holds the default exec mask. */ + bitmap_set_bit (regs, EXEC_LO_REG); + bitmap_set_bit (regs, EXEC_HI_REG); + /* This register holds vector containing value N at N-th lane + for every lane where exec mask is set. */ + bitmap_set_bit (regs, FIRST_VGPR_REG); +} + +/* Table of valid machine attributes. */ +static const struct attribute_spec gcn_attribute_table[] = +{ + /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler, + affects_type_identity } */ + { "amdgpu_hsa_kernel", 0, GCN_KERNEL_ARG_TYPES, false, true, true, + gcn_handle_amdgpu_hsa_kernel_attribute, true }, + /* End element. */ + { NULL, 0, 0, false, false, false, NULL, false } +}; + +/* When this hook returns true for MODE, the compiler allows + registers explicitly used in the rtl to be used as spill registers + but prevents the compiler from extending the lifetime of these + registers. */ + +bool +gcn_small_register_classes_for_mode_p (machine_mode mode) +{ + /* We allocate into exec and vcc regs. Those make small register class. */ + return mode == DImode || mode == SImode; +} + +/* Vector registers are wide and we can not directly subreg into word + sized parts. */ + +int +gcn_regmode_natural_size (enum machine_mode mode) +{ + if (vgpr_vector_mode_p (mode)) + return GET_MODE_SIZE (mode); + return 4; +} + +enum gcn_builtin_type_index +{ + GCN_BTI_END_OF_PARAMS, + + GCN_BTI_VOID, + GCN_BTI_EXEC, + + GCN_BTI_V64SI, + GCN_BTI_V64SF, + GCN_BTI_V64PTR, + GCN_BTI_SIPTR, + GCN_BTI_SFPTR, + + GCN_BTI_MAX +}; + +static GTY(()) tree gcn_builtin_types[GCN_BTI_MAX]; + +#define exec_type_node (gcn_builtin_types[GCN_BTI_EXEC]) +#define v64si_type_node (gcn_builtin_types[GCN_BTI_V64SI]) +#define v64sf_type_node (gcn_builtin_types[GCN_BTI_V64SF]) +#define v64ptr_type_node (gcn_builtin_types[GCN_BTI_V64PTR]) +#define siptr_type_node (gcn_builtin_types[GCN_BTI_SIPTR]) +#define sfptr_type_node (gcn_builtin_types[GCN_BTI_SFPTR]) + +static rtx gcn_expand_builtin_1 (tree, rtx, rtx, machine_mode, int, + struct gcn_builtin_description *); +static rtx gcn_expand_builtin_binop (tree, rtx, rtx, machine_mode, int, + struct gcn_builtin_description *); + +struct gcn_builtin_description; +typedef rtx (*gcn_builtin_expander) (tree, rtx, rtx, machine_mode, int, + struct gcn_builtin_description *); + +enum gcn_builtin_type +{ + B_UNIMPLEMENTED, /* Sorry out */ + B_INSN, /* Emit a pattern */ + B_OVERLOAD, /* Placeholder for an overloaded function */ +}; + +struct gcn_builtin_description +{ + int fcode; + int icode; + const char *name; + enum gcn_builtin_type type; + /* The first element of parm is always the return type. The rest + are a zero terminated list of parameters. */ + int parm[6]; + gcn_builtin_expander expander; +}; + + +/* Codes for all the GCN builtins. */ + +enum gcn_builtin_codes +{ +#define DEF_BUILTIN(fcode, icode, name, type, params, expander) \ + GCN_BUILTIN_ ## fcode, +#define DEF_BUILTIN_BINOP_INT_FP(fcode, ic, name) \ + GCN_BUILTIN_ ## fcode ## _V64SI, \ + GCN_BUILTIN_ ## fcode ## _V64SI_unspec, +#include "gcn-builtins.def" +#undef DEF_BUILTIN +#undef DEF_BUILTIN_BINOP_INT_FP + GCN_BUILTIN_MAX +}; + +extern GTY(()) struct gcn_builtin_description gcn_builtins[GCN_BUILTIN_MAX]; + +struct gcn_builtin_description gcn_builtins[] = { +#define DEF_BUILTIN(fcode, icode, name, type, params, expander) \ + {GCN_BUILTIN_ ## fcode, icode, name, type, params, expander}, + +#define DEF_BUILTIN_BINOP_INT_FP(fcode, ic, name) \ + {GCN_BUILTIN_ ## fcode ## _V64SI, \ + CODE_FOR_ ## ic ##v64si3_vector, name "_v64int", B_INSN, \ + {GCN_BTI_V64SI, GCN_BTI_EXEC, GCN_BTI_V64SI, GCN_BTI_V64SI, \ + GCN_BTI_V64SI, GCN_BTI_END_OF_PARAMS}, gcn_expand_builtin_binop}, \ + {GCN_BUILTIN_ ## fcode ## _V64SI_unspec, \ + CODE_FOR_ ## ic ##v64si3_vector, name "_v64int_unspec", B_INSN, \ + {GCN_BTI_V64SI, GCN_BTI_EXEC, GCN_BTI_V64SI, GCN_BTI_V64SI, \ + GCN_BTI_END_OF_PARAMS}, gcn_expand_builtin_binop}, + +#include "gcn-builtins.def" +#undef DEF_BUILTIN_BINOP_INT_FP +#undef DEF_BUILTIN +}; + +static GTY(()) tree gcn_builtin_decls[GCN_BUILTIN_MAX]; + +/* Return the GCN builtin for CODE. */ + +static tree +gcn_builtin_decl (unsigned code, bool ARG_UNUSED (initialize_p)) +{ + if (code >= GCN_BUILTIN_MAX) + return error_mark_node; + + return gcn_builtin_decls[code]; +} + +static void +gcn_init_builtin_types (void) +{ + gcn_builtin_types[GCN_BTI_VOID] = void_type_node; + exec_type_node = unsigned_intDI_type_node; + v64si_type_node = build_vector_type (intSI_type_node, 64); + v64sf_type_node = build_vector_type (float_type_node, 64); + v64ptr_type_node + = build_vector_type (unsigned_intDI_type_node + /*build_pointer_type (integer_type_node)*/, 64); + tree tmp = build_distinct_type_copy (intSI_type_node); + TYPE_ADDR_SPACE (tmp) = ADDR_SPACE_FLAT; + siptr_type_node = build_pointer_type (tmp); + + tmp = build_distinct_type_copy (float_type_node); + TYPE_ADDR_SPACE (tmp) = ADDR_SPACE_FLAT; + sfptr_type_node = build_pointer_type (tmp); +} + +/* Set up all builtin functions for this target. */ + +static void +gcn_init_builtins (void) +{ + gcn_init_builtin_types (); + + struct gcn_builtin_description *d; + unsigned int i; + for (i = 0, d = gcn_builtins; i < GCN_BUILTIN_MAX; i++, d++) + { + tree p; + char name[64]; /* build_function will make a copy. */ + int parm; + + /* FIXME: Is this necessary/useful? */ + if (d->name == 0) + continue; + + /* Find last parm. */ + for (parm = 1; d->parm[parm] != GCN_BTI_END_OF_PARAMS; parm++) + ; + + p = void_list_node; + while (parm > 1) + p = tree_cons (NULL_TREE, gcn_builtin_types[d->parm[--parm]], p); + + p = build_function_type (gcn_builtin_types[d->parm[0]], p); + + sprintf (name, "__builtin_gcn_%s", d->name); + gcn_builtin_decls[i] + = add_builtin_function (name, p, i, BUILT_IN_MD, NULL, NULL_TREE); + + /* These builtins don't throw. */ + TREE_NOTHROW (gcn_builtin_decls[i]) = 1; + } +} + +static rtx +gcn_expand_builtin_1 (tree exp, rtx target, rtx /*subtarget*/, + machine_mode /*mode*/, int ignore, + struct gcn_builtin_description *) +{ + tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0); + switch (DECL_FUNCTION_CODE (fndecl)) + { + case GCN_BUILTIN_FLAT_LOAD_INT32: + { + if (ignore) + return target; + /*rtx exec = */ + force_reg (DImode, + expand_expr (CALL_EXPR_ARG (exp, 0), NULL_RTX, DImode, + EXPAND_NORMAL)); + /*rtx ptr =*/ + force_reg (V64DImode, + expand_expr (CALL_EXPR_ARG (exp, 1), NULL_RTX, V64DImode, + EXPAND_NORMAL)); + /*emit_insn (gen_vector_flat_loadv64si + (target, gcn_gen_undef (V64SImode), ptr, exec));*/ + return target; + } + case GCN_BUILTIN_FLAT_LOAD_PTR_INT32: + case GCN_BUILTIN_FLAT_LOAD_PTR_FLOAT: + { + if (ignore) + return target; + rtx exec + = force_reg (DImode, + expand_expr (CALL_EXPR_ARG (exp, 0), NULL_RTX, DImode, + EXPAND_NORMAL)); + rtx ptr + = force_reg (DImode, + expand_expr (CALL_EXPR_ARG (exp, 1), NULL_RTX, V64DImode, + EXPAND_NORMAL)); + rtx offsets = force_reg (V64SImode, + expand_expr (CALL_EXPR_ARG (exp, 2), NULL_RTX, V64DImode, + EXPAND_NORMAL)); + rtx addrs = gen_reg_rtx (V64DImode); + rtx tmp = gen_reg_rtx (V64SImode); + emit_insn (gen_ashlv64si3_vector (tmp, offsets, + gcn_vec_constant (V64SImode, 2), + exec, gcn_gen_undef (V64SImode))); + emit_insn (gen_addv64di3_zext_dup2 (addrs, tmp, ptr, exec, + gcn_gen_undef (V64DImode))); + rtx mem = gen_rtx_MEM (GET_MODE (target), addrs); + set_mem_addr_space (mem, ADDR_SPACE_FLAT); + /* FIXME: set attributes. */ + emit_insn (gen_mov_with_exec (target, mem, exec)); + return target; + } + case GCN_BUILTIN_FLAT_STORE_PTR_INT32: + case GCN_BUILTIN_FLAT_STORE_PTR_FLOAT: + { + rtx exec + = force_reg (DImode, + expand_expr (CALL_EXPR_ARG (exp, 0), NULL_RTX, DImode, + EXPAND_NORMAL)); + rtx ptr + = force_reg (DImode, + expand_expr (CALL_EXPR_ARG (exp, 1), NULL_RTX, V64DImode, + EXPAND_NORMAL)); + rtx offsets = force_reg (V64SImode, + expand_expr (CALL_EXPR_ARG (exp, 2), NULL_RTX, V64DImode, + EXPAND_NORMAL)); + enum machine_mode vmode = TYPE_MODE (TREE_TYPE (CALL_EXPR_ARG (exp, + 3))); + rtx val = force_reg (vmode, + expand_expr (CALL_EXPR_ARG (exp, 3), NULL_RTX, vmode, + EXPAND_NORMAL)); + rtx addrs = gen_reg_rtx (V64DImode); + rtx tmp = gen_reg_rtx (V64SImode); + emit_insn (gen_ashlv64si3_vector (tmp, offsets, + gcn_vec_constant (V64SImode, 2), + exec, gcn_gen_undef (V64SImode))); + emit_insn (gen_addv64di3_zext_dup2 (addrs, tmp, ptr, exec, + gcn_gen_undef (V64DImode))); + rtx mem = gen_rtx_MEM (vmode, addrs); + set_mem_addr_space (mem, ADDR_SPACE_FLAT); + /* FIXME: set attributes. */ + emit_insn (gen_mov_with_exec (mem, val, exec)); + return target; + } + default: + gcc_unreachable (); + } +} + +/* Expansion of simple arithmetic and bit binary operation bultins. */ + +static rtx +gcn_expand_builtin_binop (tree exp, rtx target, rtx /*subtarget*/, + machine_mode /*mode*/, int ignore, + struct gcn_builtin_description *d) +{ + tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0); + int icode = d->icode; + if (ignore) + return target; + + rtx exec = force_reg (DImode, + expand_expr (CALL_EXPR_ARG (exp, 0), NULL_RTX, DImode, + EXPAND_NORMAL)); + + machine_mode m1 = insn_data[icode].operand[1].mode; + rtx arg1 = expand_expr (CALL_EXPR_ARG (exp, 1), NULL_RTX, m1, + EXPAND_NORMAL); + if (!insn_data[icode].operand[1].predicate (arg1, m1)) + arg1 = force_reg (m1, arg1); + + machine_mode m2 = insn_data[icode].operand[2].mode; + rtx arg2 = expand_expr (CALL_EXPR_ARG (exp, 2), NULL_RTX, m2, + EXPAND_NORMAL); + if (!insn_data[icode].operand[2].predicate (arg2, m2)) + arg2 = force_reg (m2, arg2); + + rtx arg_prev; + if (call_expr_nargs (exp) == 4) + { + machine_mode m_prev = insn_data[icode].operand[4].mode; + arg_prev = force_reg (m_prev, + expand_expr (CALL_EXPR_ARG (exp, 3), NULL_RTX, + m_prev, EXPAND_NORMAL)); + } + else + arg_prev = gcn_gen_undef (GET_MODE (target)); + + rtx pat = GEN_FCN (icode) (target, arg1, arg2, exec, arg_prev); + emit_insn (pat); + return target; +} + +/* Expand an expression EXP that calls a buVilt-in function, + with result going to TARGET if that's convenient + (and in mode MODE if that's convenient). + SUBTARGET may be used as the target for computing one of EXP's operands. + IGNORE is nonzero if the value is to be ignored. */ +rtx +gcn_expand_builtin (tree exp, + rtx target, + rtx subtarget, + machine_mode mode, + int ignore) +{ + tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0); + unsigned int fcode = DECL_FUNCTION_CODE (fndecl); + struct gcn_builtin_description *d; + + gcc_assert (fcode < GCN_BUILTIN_MAX); + d = &gcn_builtins[fcode]; + + if (d->type == B_UNIMPLEMENTED) + sorry ("Builtin not implemented"); + + return d->expander (exp, target, subtarget, mode, ignore, d); +} + + +/* Implement TARGET_CLASS_LIKELY_SPILLED_P. */ + +static bool +gcn_class_likely_spilled_p (reg_class_t rclass) +{ + return rclass == EXEC_MASK_REG; +} + +/* Implement TARGET_PREFERRED_RELOAD_CLASS. + + Put float CONST_DOUBLE in the constant pool instead of fp regs. + QImode must go into class Q_REGS. + Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and + movdf to do mem-to-mem moves through integer regs. */ + +static reg_class_t +gcn_preferred_reload_class (rtx x, reg_class_t regclass) +{ + machine_mode mode = GET_MODE (x); + + /* We're only allowed to return a subclass of CLASS. Many of the + following checks fail for NO_REGS, so eliminate that early. */ + if (regclass == NO_REGS) + return NO_REGS; + + /* All classes can load zeros. */ + if (x == CONST0_RTX (mode)) + return regclass; + + if (regclass == EXEC_MASK_REG) + return SGPR_EXEC_REGS; + + return regclass; +} + +unsigned +gcn_frame_pointer_regnum () +{ + if (cfun && cfun->machine) + return cfun->machine->args.reg[PRIVATE_SEGMENT_WAVE_OFFSET_ARG]; + return 0; +} + +bool +gcn_can_eliminate_p (int /*from_reg*/, int to_reg) +{ + return ((unsigned) to_reg == HARD_FRAME_POINTER_REGNUM); +} + +/* Only vector registers can be read/written, be sure to tell IRA about this. + */ + +static reg_class_t +gcn_secondary_reload (bool, rtx, reg_class_t rclass, + machine_mode, secondary_reload_info *) +{ + if (rclass == VGPR_REGS) + return NO_REGS; + return VGPR_REGS; +} + +/* Update register usage after having seen the compiler flags. */ + +static void +gcn_conditional_register_usage (void) +{ + int i; + for (i=0;i<16;i++) + fixed_regs[i] = !cfun || !cfun->machine ? 1 : 0; + if (!cfun || !cfun->machine) + return; + if (cfun->machine->args.reg[PRIVATE_SEGMENT_WAVE_OFFSET_ARG] >= 0) + fixed_regs[cfun->machine->args.reg[PRIVATE_SEGMENT_WAVE_OFFSET_ARG]] = 1; + if (cfun->machine->args.reg[PRIVATE_SEGMENT_BUFFER_ARG] >= 0) + { + fixed_regs[cfun->machine->args.reg[PRIVATE_SEGMENT_BUFFER_ARG]] = 1; + fixed_regs[cfun->machine->args.reg[PRIVATE_SEGMENT_BUFFER_ARG] + 1] = 1; + fixed_regs[cfun->machine->args.reg[PRIVATE_SEGMENT_BUFFER_ARG] + 2] = 1; + fixed_regs[cfun->machine->args.reg[PRIVATE_SEGMENT_BUFFER_ARG] + 3] = 1; + } +} +/* TARGET overrides. */ +#undef TARGET_FUNCTION_VALUE +#define TARGET_FUNCTION_VALUE gcn_function_value +#undef TARGET_STATIC_CHAIN +#define TARGET_STATIC_CHAIN gcn_static_chain +#undef TARGET_FUNCTION_VALUE_REGNO_P +#define TARGET_FUNCTION_VALUE_REGNO_P gcn_function_value_regno_p +#undef TARGET_FUNCTION_ARG_ADVANCE +#define TARGET_FUNCTION_ARG_ADVANCE gcn_function_arg_advance +#undef TARGET_FUNCTION_ARG +#define TARGET_FUNCTION_ARG gcn_function_arg +#undef TARGET_ASM_FILE_START +#define TARGET_ASM_FILE_START output_file_start +#undef TARGET_LRA_P +#define TARGET_LRA_P hook_bool_void_true +#undef TARGET_REGISTER_MOVE_COST +#define TARGET_REGISTER_MOVE_COST gcn_register_move_cost +#undef TARGET_SMALL_REGISTER_CLASSES_FOR_MODE_P +#define TARGET_SMALL_REGISTER_CLASSES_FOR_MODE_P \ + gcn_small_register_classes_for_mode_p +#undef TARGET_RTX_COSTS +#define TARGET_RTX_COSTS gcn_rtx_costs +#undef TARGET_MEMORY_MOVE_COST +#define TARGET_MEMORY_MOVE_COST gcn_memory_move_cost +#undef TARGET_SPILL_CLASS +#define TARGET_SPILL_CLASS gcn_spill_class +#undef TARGET_IRA_CHANGE_PSEUDO_ALLOCNO_CLASS +#define TARGET_IRA_CHANGE_PSEUDO_ALLOCNO_CLASS \ + gcn_ira_change_pseudo_allocno_class +#undef TARGET_SCALAR_MODE_SUPPORTED_P +#define TARGET_SCALAR_MODE_SUPPORTED_P gcn_scalar_mode_supported_p +#undef TARGET_VECTOR_MODE_SUPPORTED_P +#define TARGET_VECTOR_MODE_SUPPORTED_P gcn_vector_mode_supported_p +#undef TARGET_CLASS_MAX_NREGS +#define TARGET_CLASS_MAX_NREGS gcn_class_max_nregs +#undef TARGET_OPTION_OVERRIDE +#define TARGET_OPTION_OVERRIDE gcn_option_override +#undef TARGET_EXTRA_LIVE_ON_ENTRY +#define TARGET_EXTRA_LIVE_ON_ENTRY gcn_live_on_entry + +#undef TARGET_ATTRIBUTE_TABLE +#define TARGET_ATTRIBUTE_TABLE gcn_attribute_table + +#undef TARGET_ADDR_SPACE_POINTER_MODE +#define TARGET_ADDR_SPACE_POINTER_MODE gcn_addr_space_pointer_mode + +#undef TARGET_ADDR_SPACE_ADDRESS_MODE +#define TARGET_ADDR_SPACE_ADDRESS_MODE gcn_addr_space_address_mode + +#undef TARGET_ADDR_SPACE_LEGITIMATE_ADDRESS_P +#define TARGET_ADDR_SPACE_LEGITIMATE_ADDRESS_P \ + gcn_addr_space_legitimate_address_p + +#undef TARGET_ADDR_SPACE_LEGITIMIZE_ADDRESS +#define TARGET_ADDR_SPACE_LEGITIMIZE_ADDRESS gcn_addr_space_legitimize_address + +#undef TARGET_ADDR_SPACE_SUBSET_P +#define TARGET_ADDR_SPACE_SUBSET_P gcn_addr_space_subset_p + +#undef TARGET_ADDR_SPACE_CONVERT +#define TARGET_ADDR_SPACE_CONVERT gcn_addr_space_convert + +#undef TARGET_INIT_BUILTINS +#define TARGET_INIT_BUILTINS gcn_init_builtins +#undef TARGET_EXPAND_BUILTIN +#define TARGET_EXPAND_BUILTIN gcn_expand_builtin +#undef TARGET_BUILTIN_DECL +#define TARGET_BUILTIN_DECL gcn_builtin_decl +/*#undef TARGET_CLASS_LIKELY_SPILLED_P +#define TARGET_CLASS_LIKELY_SPILLED_P gcn_class_likely_spilled_p +#undef TARGET_PREFERRED_RELOAD_CLASS +#define TARGET_PREFERRED_RELOAD_CLASS gcn_preferred_reload_class +#undef TARGET_CAN_ELIMINATE +#define TARGET_CAN_ELIMINATE gcn_can_eliminate_p +#undef TARGET_SECONDARY_RELOAD +#define TARGET_SECONDARY_RELOAD gcn_secondary_reload*/ +#undef TARGET_CONDITIONAL_REGISTER_USAGE +#define TARGET_CONDITIONAL_REGISTER_USAGE gcn_conditional_register_usage + +struct gcc_target targetm = TARGET_INITIALIZER; + +#include "gt-gcn.h" diff --git a/gcc/config/gcn/gcn.h b/gcc/config/gcn/gcn.h new file mode 100644 index 00000000000..3b41095542c --- /dev/null +++ b/gcc/config/gcn/gcn.h @@ -0,0 +1,718 @@ +/* Copyright (C) 2016-2017 Free Software Foundation, Inc. + + This file is free software; you can redistribute it and/or modify it under + the terms of the GNU General Public License as published by the Free + Software Foundation; either version 3 of the License, or (at your option) + any later version. + + This file is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + for more details. + + You should have received a copy of the GNU General Public License + along with GCC; see the file COPYING3. If not see + <http://www.gnu.org/licenses/>. */ + +#include "config/gcn/gcn-opts.h" + + +/* FIXME */ +#define TARGET_CPU_CPP_BUILTINS() + +/* Temporarily disable libgcc until one actually exists. */ +#undef LIBGCC_SPEC +#define LIBGCC_SPEC "" + +/* Use LLVM assembler options. */ +#undef ASM_SPEC +#define ASM_SPEC "-triple=amdgcn--amdhsa %{march=*:-mcpu=%*} -filetype=obj" + +#undef LINK_SPEC +#define LINK_SPEC "" + +/* Support for a compile-time default architecture and tuning. The rules are: + --with-arch is ignored if -march is specified. + --with-tune is ignored if -mtune is specified. */ +#define OPTION_DEFAULT_SPECS \ + {"arch", "%{!march=*:-march=%(VALUE)}" }, \ + {"tune", "%{!mtune=*:-mtune=%(VALUE)}" } + +/* Default target_flags if no switches specified. */ +#ifndef TARGET_DEFAULT +#define TARGET_DEFAULT 0 +#endif + + +/* Storage Layout */ + +#define BITS_BIG_ENDIAN 0 + +#define BYTES_BIG_ENDIAN 0 + +#define WORDS_BIG_ENDIAN 0 + +#define BITS_PER_WORD 32 + +#define UNITS_PER_WORD (BITS_PER_WORD/BITS_PER_UNIT) + +#ifndef LIBGCC2_UNITS_PER_WORD +#define LIBGCC2_UNITS_PER_WORD 8 +#endif + +#define TARGET_64BIT 1 + +/* Scratch memory is addressed by buffered meomry accesses that are 32bit. */ +#define POINTER_SIZE 32 + +#define PARM_BOUNDARY 128 + +/* FIXME */ +#define STACK_BOUNDARY 128 + +#define FUNCTION_BOUNDARY 32 + +#define BIGGEST_ALIGNMENT 512 + +/* FIXME */ +#define DATA_ALIGNMENT(TYPE,ALIGN) ((ALIGN) > 128 ? (ALIGN) : 128) +#define CONSTANT_ALIGNMENT(TYPE,ALIGN) ((ALIGN) > 128 ? (ALIGN) : 128) +#define LOCAL_ALIGNMENT(TYPE,ALIGN) ((ALIGN) > 128 ? (ALIGN) : 128) + +/* FIXME */ +#define EMPTY_FIELD_BOUNDARY 32 + +#define STRICT_ALIGNMENT 1 + +/* FIXME */ +#define PCC_BITFIELD_TYPE_MATTERS 1 + +/* FIXME */ +#define MAX_FIXED_MODE_SIZE 128 + +#define STACK_SIZE_MODE DImode + + +/* Type Layout: match what x86-64 does. */ + +#define INT_TYPE_SIZE 32 + +#define LONG_TYPE_SIZE (TARGET_64BIT ? 64 : 32) + +#define LONG_LONG_TYPE_SIZE (TARGET_64BIT ? 128 : 64) + +#define FLOAT_TYPE_SIZE 32 + +#define DOUBLE_TYPE_SIZE 64 + +/* FIXME: software emulated? */ +#define LONG_DOUBLE_TYPE_SIZE 64 + +#define DEFAULT_SIGNED_CHAR 1 + +/* Register Basics */ +#define FIRST_SGPR_REG 0 +#define LAST_SGPR_REG 101 + +#define FLAT_SCRATCH_REG 102 +#define FLAT_SCRATCH_LO_REG 102 +#define FLAT_SCRATCH_HI_REG 103 +#define XNACK_MASK_REG 104 +#define XNACK_MASK_LO_REG 104 +#define XNACK_MASK_HI_REG 105 +#define VCC_LO_REG 106 +#define VCC_HI_REG 107 +#define VCCZ_REG 108 +#define TBA_REG 109 +#define TBA_LO_REG 109 +#define TBA_HI_REG 110 +#define TMA_REG 111 +#define TMA_LO_REG 111 +#define TMA_HI_REG 112 +#define TTMP0_REG 113 +#define TTMP11_REG 124 +#define M0_REG 125 +#define EXEC_REG 126 +#define EXEC_LO_REG 126 +#define EXEC_HI_REG 127 +#define EXECZ_REG 128 +#define SCC_REG 129 +/* 132-159 is reserved; I am lazy to produce masks. */ + +#define FIRST_VGPR_REG 160 +#define LAST_VGPR_REG 415 + +#define VGPR_REGNO(N) ((N)+FIRST_VGPR_REG) +#define SGPR_REGNO(N) ((N)+FIRST_SGPR_REG) + +#define SGPR_OR_VGPR_REGNO_P(N) ((N)>=FIRST_VGPR_REG && (N) <= LAST_SGPR_REG) +#define SGPR_REGNO_P(N) ((N) <= LAST_SGPR_REG) +#define VGPR_REGNO_P(N) ((N)>=FIRST_VGPR_REG && (N) <= LAST_VGPR_REG) +#define CC_REG_P(X) (REG_P (X) && CC_REGNO_P (REGNO (X))) +#define CC_REGNO_P(X) ((X) == SCC_REG || (X) == VCC_REG) + +#define FIRST_PSEUDO_REGISTER 416 + +/* s[16:17] is fixed for exec hack moves. + Both registers can be probably eliminated and passed to regalloc for general + purpose with some magic. */ +#define FIXED_REGISTERS { \ + /* Scalars. */ \ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \ + 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, \ + /* Special regs and padding. */ \ +/* flat xnack vcc tba tma ttmp */ \ + 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, \ +/* m0 exec scc */ \ + 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, \ + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, \ + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, \ + /* VGRPs */ \ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \ +} + +#define CALL_USED_REGISTERS { \ + /* Scalars. */ \ + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, \ + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, \ + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, \ + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, \ + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, \ + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, \ + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, \ + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, \ + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, \ + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, \ + /* Special regs and padding. */ \ + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, \ + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, \ + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, \ + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, \ + /* VGRPs */ \ + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, \ + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, \ + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, \ + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, \ + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, \ + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, \ + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, \ + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, \ + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, \ + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, \ + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, \ + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, \ + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, \ + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, \ + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, \ + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, \ +} + + +/* Values in Registers */ + +#define HARD_REGNO_NREGS(REGNO, MODE) gcn_hard_regno_nregs (REGNO, MODE) +#define HARD_REGNO_MODE_OK(REGNO, MODE) gcn_hard_regno_mode_ok (REGNO, MODE) +#define CANNOT_CHANGE_MODE_CLASS(FROM,TO,CLASS) gcn_cannot_change_mode_class (FROM, TO, CLASS) + +#define MODES_TIEABLE_P(MODE1, MODE2) \ + (GET_MODE_BITSIZE (MODE1) <= MAX_FIXED_MODE_SIZE \ + && GET_MODE_BITSIZE (MODE2) <= MAX_FIXED_MODE_SIZE) + +/* Register Classes */ + +enum reg_class { + NO_REGS, + + /* SCC */ + SCC_CONDITIONAL_REG, + + /* VCCZ */ + VCCZ_CONDITIONAL_REG, + + /* VCC */ + VCC_CONDITIONAL_REG, + + /* EXECZ */ + EXECZ_CONDITIONAL_REG, + + /* SCC VCCZ EXECZ */ + ALL_CONDITIONAL_REGS, + + /* EXEC */ + EXEC_MASK_REG, + + /* SGPR0-101 */ + SGPR_REGS, + + /* SGPR0-101 EXEC_LO/EXEC_HI */ + SGPR_EXEC_REGS, + + /* SGPR0-101, VCC LO/HI, TBA LO/HI, TMA LO/HI, TTMP0-11, M0, EXEC LO/HI, + VCCZ, EXECZ, SCC + FIXME: Maybe manual has bug and FLAT_SCRATCH is OK. */ + SGPR_VOP3A_SRC_REGS, + + /* SGPR0-101, FLAT_SCRATCH_LO/HI, XNACK_MASK_LO/HI, VCC LO/HI, TBA LO/HI + TMA LO/HI, TTMP0-11 */ + SGPR_MEM_SRC_REGS, + + /* SGPR0-101, FLAT_SCRATCH_LO/HI, XNACK_MASK_LO/HI, VCC LO/HI, TBA LO/HI + TMA LO/HI, TTMP0-11, M0, EXEC LO/HI */ + SGPR_DST_REGS, + + /* SGPR0-101, FLAT_SCRATCH_LO/HI, XNACK_MASK_LO/HI, VCC LO/HI, TBA LO/HI + TMA LO/HI, TTMP0-11 */ + SGPR_SRC_REGS, + GENERAL_REGS, + VGPR_REGS, + SRCDST_REGS, + ALL_REGS, + LIM_REG_CLASSES +}; + +#define N_REG_CLASSES (int) LIM_REG_CLASSES + +#define REG_CLASS_NAMES \ +{ "NO_REGS", \ + "SCC_CONDITIONAL_REG", \ + "VCCZ_CONDITIONAL_REG", \ + "VCC_CONDITIONAL_REG", \ + "EXECZ_CONDITIONAL_REG", \ + "ALL_CONDITIONAL_REGS", \ + "EXEC_MASK_REG", \ + "SGPR_REGS", \ + "SGPR_EXEC_REGS", \ + "SGPR_VOP3A_SRC_REGS", \ + "SGPR_MEM_SRC_REGS", \ + "SGPR_SRC_REGS", \ + "SGPR_DST_REGS", \ + "GENERAL_REGS", \ + "VGPR_REGS", \ + "SRCDST_REGS", \ + "ALL_REGS" \ +} + +#define NAMED_REG_MASK(N) (1<<((N)-3*32)) +#define NAMED_REG_MASK2(N) (1<<((N)-4*32)) + +#define REG_CLASS_CONTENTS { \ + {0, 0, 0, 0, \ + 0, 0, 0, 0, \ + 0, 0, 0, 0, 0}, /* no regs */ \ + {0, 0, 0, 0, \ + NAMED_REG_MASK2 (SCC_REG), 0, 0, 0, \ + 0, 0, 0, 0}, /* scc reg */ \ + {0, 0, 0, \ + NAMED_REG_MASK (VCCZ_REG), 0, 0, 0, 0, \ + 0, 0, 0, 0, 0}, /* vccz reg */ \ + {0, 0, 0, \ + NAMED_REG_MASK (VCC_LO_REG)|NAMED_REG_MASK (VCC_HI_REG), 0, 0, 0, 0,\ + 0, 0, 0, 0, 0}, /* vccz reg */ \ + {0, 0, 0, 0, \ + NAMED_REG_MASK2 (EXECZ_REG), 0, 0, 0, \ + 0, 0, 0, 0}, /* execz reg */ \ + {0, 0, 0, \ + NAMED_REG_MASK (VCCZ_REG), \ + NAMED_REG_MASK (EXECZ_REG) | NAMED_REG_MASK2 (SCC_REG), \ + 0, 0, 0, \ + 0, 0, 0, 0, 0}, /* all conditional regs */ \ + {0, 0, 0, \ + NAMED_REG_MASK (EXEC_LO_REG) | NAMED_REG_MASK (EXEC_HI_REG),\ + 0, \ + 0, 0, 0, \ + 0, 0, 0, 0, 0}, /* exec mask reg */ \ + {0xffffffff, 0xffffffff, 0xffffffff, 0xf1, \ + 0, 0, 0, 0, \ + 0, 0, 0, 0, \ + 0}, /* sgpr regs*/ \ + {0xffffffff, 0xffffffff, 0xffffffff, 0xf1 \ + | NAMED_REG_MASK (EXEC_LO_REG) | NAMED_REG_MASK (EXEC_HI_REG),\ + 0, 0, 0, 0, \ + 0, 0, 0, 0, \ + 0}, /* sgpr exec regs*/ \ + {0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff \ + -NAMED_REG_MASK (FLAT_SCRATCH_LO_REG) \ + -NAMED_REG_MASK (FLAT_SCRATCH_HI_REG) \ + -NAMED_REG_MASK (XNACK_MASK_LO_REG) \ + -NAMED_REG_MASK (XNACK_MASK_HI_REG), \ + NAMED_REG_MASK (EXECZ_REG) | NAMED_REG_MASK2 (SCC_REG), \ + 0, 0, 0, \ + 0, 0, 0, 0, \ + 0}, /* sgpr vopra regs*/\ + {0xffffffff, 0xffffffff, 0xffffffff, \ + 0xffffffff-NAMED_REG_MASK (VCCZ_REG)-NAMED_REG_MASK (M0_REG)\ + -NAMED_REG_MASK (EXEC_LO_REG)-NAMED_REG_MASK (EXEC_HI_REG),\ + 0, 0, 0, 0, \ + 0, 0, 0, 0, \ + 0}, /* sgpr src mem regs*/\ + {0xffffffff, 0xffffffff, 0xffffffff, \ + 0xffffffff-NAMED_REG_MASK (VCCZ_REG), \ + 0, 0, 0, 0, \ + 0, 0, 0, 0, \ + 0}, /* sgpr dst regs*/\ + {0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, \ + NAMED_REG_MASK2 (EXECZ_REG) | NAMED_REG_MASK2 (SCC_REG), \ + 0, 0, 0, \ + 0, 0, 0, 0, \ + 0}, /* sgpr src regs*/\ + {0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, \ + 0xffffffff, 0, 0, 0, \ + 0, 0, 0, 0, \ + 0}, /* general regs*/ \ + {0, 0, 0, 0, \ + 0, 0xffffffff, 0xffffffff, 0xffffffff, \ + 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, \ + 0xffffffff}, /* vector regs */ \ + {0xffffffff, 0xffffffff, 0xffffffff, \ + 0xffffffff-NAMED_REG_MASK (VCCZ_REG), \ + 0, 0xffffffff, 0xffffffff, 0xffffffff, \ + 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, \ + 0xffffffff}, /* srcdst regs*/\ + {0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, \ + 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, \ + 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, \ + 0xffffffff}} /* all regs*/ + +#define REGNO_REG_CLASS(REGNO) gcn_regno_reg_class (REGNO) +#define MODE_CODE_BASE_REG_CLASS(MODE, AS, OUTER, INDEX) \ + gcn_mode_code_base_reg_class (MODE, AS, OUTER, INDEX) +#define REGNO_MODE_CODE_OK_FOR_BASE_P(NUM, MODE, AS, OUTER, INDEX) \ + gcn_regno_mode_code_ok_for_base_p (NUM, MODE, AS, OUTER, INDEX) +#define INDEX_REG_CLASS VGPR_REGS +#define REGNO_OK_FOR_INDEX_P(regno) regno_ok_for_index_p (regno) + +/* Forward declaration so the following compiles. */ +extern short *reg_renumber; + +#define INT_REG_OK_FOR_INDEX_P(X, STRICT) \ + ((!(STRICT) || REGNO_OK_FOR_INDEX_P (REGNO (X)))) +#define INT_REG_OK_FOR_BASE_P(X, STRICT) \ + ((!(STRICT) || REGNO_OK_FOR_BASE_P (REGNO (X)))) + +/* Address spaces. */ +enum gcn_address_spaces { + ADDR_SPACE_SCRATCH = 0, + ADDR_SPACE_FLAT, + ADDR_SPACE_SCALAR_FLAT, + ADDR_SPACE_LDS, + ADDR_SPACE_GDS +}; +#define REGISTER_TARGET_PRAGMAS() do { \ +c_register_addr_space ("__flat", ADDR_SPACE_FLAT); \ +c_register_addr_space ("__scalar_flat", ADDR_SPACE_SCALAR_FLAT); \ +c_register_addr_space ("__lds", ADDR_SPACE_LDS); \ +c_register_addr_space ("__gds", ADDR_SPACE_LDS); \ +}while (0); + +/* Sections */ +#define GLOBAL_ASM_OP "\t.globl\t" + +/* File Framework */ +#undef ASM_DECLARE_FUNCTION_NAME +#define ASM_DECLARE_FUNCTION_NAME(FILE, NAME, DECL) \ + gcn_hsa_declare_function_name ((FILE), (NAME), (DECL)) + +#define ASM_APP_ON "" + +#define ASM_APP_OFF "" + + +/* Uninitialized Data */ +#define ASM_OUTPUT_COMMON(FILE, NAME, SIZE, ROUNDED) \ +( fputs (".comm ", (FILE)), \ + assemble_name ((FILE), (NAME)), \ + fprintf ((FILE), ",%d\n", (ROUNDED))) + +#define ASM_OUTPUT_LOCAL(FILE, NAME, SIZE, ROUNDED) \ +( fputs (".lcomm ", (FILE)), \ + assemble_name ((FILE), (NAME)), \ + fprintf ((FILE), ",%d\n", (ROUNDED))) + + +/* Label Output */ +#define ASM_OUTPUT_LABEL(FILE,NAME) \ + do { assemble_name (FILE, NAME); fputs (":\n", FILE); } while (0) + +#define ASM_OUTPUT_LABELREF(FILE, NAME) \ + asm_fprintf (FILE, "%U%s", default_strip_name_encoding (NAME)) + +#define ASM_OUTPUT_SYMBOL_REF(FILE, X) \ + do \ + { \ + tree decl; \ + assemble_name (FILE, XSTR ((X), 0)); \ + if ((decl = SYMBOL_REF_DECL ((X))) != 0 \ + && TREE_CODE (decl) == VAR_DECL \ + && TYPE_ADDR_SPACE (TREE_TYPE (decl))) \ + fputs ("@ppu", FILE); \ + } while (0) + + +/* Instruction Output */ +#define REGISTER_NAMES \ + {"s0", "s1", "s2", "s3", "s4", "s5", "s6", "s7", "s8", "s9", \ + "s10", "s11", "s12", "s13", "s14", "s15", "s16", "s17", "s18", "s19", \ + "s20", "s21", "s22", "s23", "s24", "s25", "s26", "s27", "s28", "s29", \ + "s30", "s31", "s32", "s33", "s34", "s35", "s36", "s37", "s38", "s39", \ + "s40", "s41", "s42", "s43", "s44", "s45", "s46", "s47", "s48", "s49", \ + "s50", "s51", "s52", "s53", "s54", "s55", "s56", "s57", "s58", "s59", \ + "s60", "s61", "s62", "s63", "s64", "s65", "s66", "s67", "s68", "s69", \ + "s70", "s71", "s72", "s73", "s74", "s75", "s76", "s77", "s78", "s79", \ + "s80", "s81", "s82", "s83", "s84", "s85", "s86", "s87", "s88", "s89", \ + "s90", "s91", "s92", "s93", "s94", "s95", "s96", "s97", "s98", "s99", \ + "s100", "s101", \ + "flat_scratch_lo", "flat_scratch_hi", "xnack_mask_lo", "xnack_mask_hi", \ + "vcc_lo", "vcc_hi", "vccz", "tba_lo", "tba_hi", "tma_lo", "tma_hi", \ + "ttmp0", "ttmp1", "ttmp2", "ttmp3", "ttmp4", "ttmp5", "ttmp6", "ttmp7", \ + "ttmp8", "ttmp9", "ttmp10", "ttmp11", "m0", "exec_lo", "exec_hi", "execz", \ + "scc", "res130", "res131", "res132", "res133", "res134", "res135", \ + "res136", "res137", "res138", "res139", \ + "res140", "res141", "res142", "res143", "res144", "res145", "res146", \ + "res147", "res148", "res149", \ + "res150", "res151", "res152", "res153", "res154", "res155", "res156", \ + "res157", "res158", "res159", \ + "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9", \ + "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", "v18", "v19", \ + "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", \ + "v30", "v31", "v32", "v33", "v34", "v35", "v36", "v37", "v38", "v39", \ + "v40", "v41", "v42", "v43", "v44", "v45", "v46", "v47", "v48", "v49", \ + "v50", "v51", "v52", "v53", "v54", "v55", "v56", "v57", "v58", "v59", \ + "v60", "v61", "v62", "v63", "v64", "v65", "v66", "v67", "v68", "v69", \ + "v70", "v71", "v72", "v73", "v74", "v75", "v76", "v77", "v78", "v79", \ + "v80", "v81", "v82", "v83", "v84", "v85", "v86", "v87", "v88", "v89", \ + "v90", "v91", "v92", "v93", "v94", "v95", "v96", "v97", "v98", "v99", \ + "v100", "v101", "v102", "v103", "v104", "v105", "v106", "v107", "v108", \ + "v109", \ + "v110", "v111", "v112", "v113", "v114", "v115", "v116", "v117", "v118", \ + "v119", \ + "v120", "v121", "v122", "v123", "v124", "v125", "v126", "v127", "v128", \ + "v129", \ + "v130", "v131", "v132", "v133", "v134", "v135", "v136", "v137", "v138", \ + "v139", \ + "v140", "v141", "v142", "v143", "v144", "v145", "v146", "v147", "v148", \ + "v149", \ + "v150", "v151", "v152", "v153", "v154", "v155", "v156", "v157", "v158", \ + "v159", \ + "v160", "v161", "v162", "v163", "v164", "v165", "v166", "v167", "v168", \ + "v169", \ + "v170", "v171", "v172", "v173", "v174", "v175", "v176", "v177", "v178", \ + "v179", \ + "v180", "v181", "v182", "v183", "v184", "v185", "v186", "v187", "v188", \ + "v189", \ + "v190", "v191", "v192", "v193", "v194", "v195", "v196", "v197", "v198", \ + "v199", \ + "v200", "v201", "v202", "v203", "v204", "v205", "v206", "v207", "v208", \ + "v209", \ + "v210", "v211", "v212", "v213", "v214", "v215", "v216", "v217", "v218", \ + "v219", \ + "v220", "v221", "v222", "v223", "v224", "v225", "v226", "v227", "v228", \ + "v229", \ + "v230", "v231", "v232", "v233", "v234", "v235", "v236", "v237", "v238", \ + "v239", \ + "v240", "v241", "v242", "v243", "v244", "v245", "v246", "v247", "v248", \ + "v249", \ + "v250", "v251", "v252", "v253", "v254", "v255", \ + } + +#define PRINT_OPERAND(FILE, X, CODE) print_operand(FILE, X, CODE) + +#define PRINT_OPERAND_ADDRESS(FILE, ADDR) print_operand_address (FILE, ADDR) + +#define LOCAL_LABEL_PREFIX "." + +#define USER_LABEL_PREFIX "" + +#define ASM_COMMENT_START "#" + + +/* Dispatch Tables */ + +#define ASM_OUTPUT_ADDR_DIFF_ELT(FILE, BODY, VALUE, REL) \ + fprintf (FILE, "\t.word .L%d-.L%d\n", VALUE, REL) + +#define ASM_OUTPUT_ADDR_VEC_ELT(FILE, VALUE) \ + fprintf (FILE, "\t.word .L%d\n", VALUE) + + +/* Alignment Output */ + +#define ASM_OUTPUT_ALIGN(FILE,LOG) \ + do { if (LOG!=0) fprintf (FILE, "\t.align\t%d\n", 1<<(LOG)); } while (0) + + +/* Misc */ + +#define CASE_VECTOR_MODE DImode + +#define Pmode SImode + +#define FUNCTION_MODE QImode + +/* Frame Registers, and other registers */ + +#define STACK_POINTER_REGNUM FLAT_SCRATCH_REG + +/* FIXME: The following declaration is also in gen-protos.h. Either remove it + from there or from here if it is not necessary in the following macro. */ +extern unsigned gcn_frame_pointer_regnum (); +/* FIXME. */ +#define HARD_FRAME_POINTER_REGNUM gcn_frame_pointer_regnum () + +#define FRAME_POINTER_REGNUM FLAT_SCRATCH_REG + +#define HARD_FRAME_POINTER_IS_ARG_POINTER false +#define HARD_FRAME_POINTER_IS_FRAME_POINTER false +/* There is no arg pointer. Just choose random fixed register that does + not intefere with anything. */ +#define ARG_POINTER_REGNUM FLAT_SCRATCH_REG +/* FIXME. */ +#define FUNCTION_ARG_REGNO_P(N) 0 + +/* Frame Layout all FIXME */ + +#define STACK_GROWS_DOWNWARD 0 + +#define FRAME_GROWS_DOWNWARD 0 + +#define STARTING_FRAME_OFFSET (0) + +#define STACK_POINTER_OFFSET 32 + +#define FIRST_PARM_OFFSET(FNDECL) (0) + +#define DYNAMIC_CHAIN_ADDRESS(FP) plus_constant (Pmode, (FP), -16) + +#define INITIAL_FRAME_POINTER_OFFSET(N) (N) + +/* Register Arguments */ + +#define GCN_KERNEL_ARG_TYPES 19 + +struct GTY(()) gcn_kernel_args +{ + long requested; + int reg[GCN_KERNEL_ARG_TYPES]; + int order[GCN_KERNEL_ARG_TYPES]; + int nargs, nsgprs; +}; + +typedef struct gcn_args { + struct gcn_kernel_args args; + int num; + int offset; + int alignment; +} CUMULATIVE_ARGS; + +#define INIT_CUMULATIVE_ARGS(CUM,FNTYPE,LIBNAME,FNDECL,N_NAMED_ARGS) \ + gcn_init_cumulative_args (&(CUM), (FNTYPE), (LIBNAME), (FNDECL), \ + (N_NAMED_ARGS) != -1) + +/* Address spaces. */ + +#define MAX_REGS_PER_ADDRESS 2 + +/* Profiling */ + +#define FUNCTION_PROFILER(FILE, LABELNO) + +#define NO_PROFILE_COUNTERS 1 + +#define PROFILE_BEFORE_PROLOGUE 0 + +/* Trampolines */ +/*FIXME*/ +#define TRAMPOLINE_SIZE (65) +/*FIXME*/ +#define TRAMPOLINE_ALIGNMENT 64 + +/* Misc */ + +#define MOVE_MAX 16 + +#define TRULY_NOOP_TRUNCATION(OUTPREC, INPREC) ((INPREC) <= 32 \ + && (OUTPREC) <= (INPREC)) + +struct GTY(()) machine_function +{ + /* Register holding default value of EXEC. */ + rtx exec_reg; + bool exec_reg_init; + struct gcn_kernel_args args; + int kernarg_segment_alignment; + int kernarg_segment_byte_size; +}; + +/* Definitions for register eliminations. + + This is an array of structures. Each structure initializes one pair + of eliminable registers. The "from" register number is given first, + followed by "to". Eliminations of the same "from" register are listed + in order of preference. + + There are two registers that can always be eliminated on the i386. + The frame pointer and the arg pointer can be replaced by either the + hard frame pointer or to the stack pointer, depending upon the + circumstances. The hard frame pointer is not used before reload and + so it is not eligible for elimination. */ + +#define ELIMINABLE_REGS \ +{{ FRAME_POINTER_REGNUM, 0}, \ + { FRAME_POINTER_REGNUM, 1}, \ + { FRAME_POINTER_REGNUM, 2}, \ + { FRAME_POINTER_REGNUM, 3}, \ + { FRAME_POINTER_REGNUM, 4}, \ + { FRAME_POINTER_REGNUM, 5}, \ + { FRAME_POINTER_REGNUM, 6}, \ + { FRAME_POINTER_REGNUM, 7}, \ + { FRAME_POINTER_REGNUM, 8}, \ + { FRAME_POINTER_REGNUM, 9}, \ + { FRAME_POINTER_REGNUM, 10}, \ + { FRAME_POINTER_REGNUM, 11}, \ + { FRAME_POINTER_REGNUM, 12}, \ + { FRAME_POINTER_REGNUM, 13}, \ + { FRAME_POINTER_REGNUM, 14}, \ + { FRAME_POINTER_REGNUM, 15}} \ + +/* Define the offset between two registers, one to be eliminated, and the other + its replacement, at the start of a routine. */ + +#define INITIAL_ELIMINATION_OFFSET(FROM, TO, OFFSET) ((OFFSET) = 0) + +#define SLOW_BYTE_ACCESS 0 + +/* Define this macro if it is advisable to hold scalars in registers + in a wider mode than that declared by the program. In such cases, + the value is constrained to be within the bounds of the declared + type, but kept valid in the wider mode. The signedness of the + extension may differ from that of the type. */ + +#define PROMOTE_MODE(MODE,UNSIGNEDP,TYPE) \ + if (GET_MODE_CLASS (MODE) == MODE_INT \ + && (TYPE == NULL || TREE_CODE (TYPE) != VECTOR_TYPE) \ + && GET_MODE_SIZE (MODE) < UNITS_PER_WORD) \ + { \ + (MODE) = SImode; \ + } diff --git a/gcc/config/gcn/gcn.md b/gcc/config/gcn/gcn.md new file mode 100644 index 00000000000..b9d8a906702 --- /dev/null +++ b/gcc/config/gcn/gcn.md @@ -0,0 +1,1068 @@ +;; Copyright (C) 2016-2017 Free Software Foundation, Inc. + +;; This file is free software; you can redistribute it and/or modify it under +;; the terms of the GNU General Public License as published by the Free +;; Software Foundation; either version 3 of the License, or (at your option) +;; any later version. + +;; This file is distributed in the hope that it will be useful, but WITHOUT +;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +;; FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +;; for more details. + +;; You should have received a copy of the GNU General Public License +;; along with GCC; see the file COPYING3. If not see +;; <http://www.gnu.org/licenses/>. + +;;- See file "rtl.def" for documentation on define_insn, match_*, et. al. + +;; ------------------------------------------------------------------------- +;; Constants and enums +;; ------------------------------------------------------------------------- + +; Named registers +(define_constants + [(FIRST_SGPR_REG 0) + (LAST_SGPR_REG 101) + (FLAT_SCRATCH_REG 102) + (FLAT_SCRATCH_LO_REG 102) + (FLAT_SCRATCH_HI_REG 103) + (XNACK_MASK_REG 104) + (XNACK_MASK_LO_REG 104) + (XNACK_MASK_HI_REG 105) + (VCC_REG 106) + (VCC_LO_REG 106) + (VCC_HI_REG 107) + (VCCZ_REG 108) + (TBA_REG 109) + (TBA_LO_REG 109) + (TBA_HI_REG 110) + (TMA_REG 111) + (TMA_LO_REG 111) + (TMA_HI_REG 112) + (TTMP0_REG 113) + (TTMP11_REG 124) + (M0_REG 125) + (EXEC_REG 126) + (EXEC_LO_REG 126) + (EXEC_HI_REG 127) + (EXECZ_REG 128) + (SCC_REG 129) +; Lazyness gap of unused hard regs. + (FIRST_VGPR_REG 160) + (LAST_VGPR_REG 415)] + ) + +; Named unspec values +(define_c_enum "unspecv" [ + UNSPECV_PROLOGUE_USE]) + +(define_c_enum "unspec" [ + UNSPEC_VECTOR]) + +;; ------------------------------------------------------------------------- +;; Attributes +;; ------------------------------------------------------------------------- + +; Every instructions should set MODE and TYPE. Other attributes should be +; more or less determined by their conditionals. + +; Main data type used by the insn +(define_attr "mode" + "unknown,none,BI,QI,HI,SI,DI,TI,HF,SF,DF,V2SF,V2SI,V3DI,V64SI,V64DI,V64SF, + V64DF,V4SF,V2DF,V3SF,V4SI,V3SI,V2DI,V64HF,V64HI,V64QI" + (const_string "unknown")) + +; Instruction type (encoding) as described the specification. +; The following table summarizes possible operands of individual instruction +; types and corresponding constraints. +; +; sop2 - scalar, two inputs, one output +; ssrc0/ssrc1: sgpr 0-102; flat_scratch,xnack,vcc,tba,tma,ttmp0-11,exec +; vccz,execz,scc,inline immedate,fp inline immediate +; sdst: sgpr 0-102; flat_scratch,xnack,vcc,tba,tma,ttmp0-11,exec +; +; Constraints "=SD, SD", "SSA,SSB","SSB,SSA" +; +; sopk - scalar, inline constant input, one output +; simm16: 16bit inline constant +; sdst: same as sop2/ssrc0 +; +; Constraints "=SD", "J" +; +; sop1 - scalar, one input, one output +; ssrc0: same as sop2/ssrc0. FIXME: manual omit VCCZ +; sdst: same as sop2/sdst +; +; Constraints "=SD", "SSA" +; +; sopc - scalar, two inputs, one comparsion +; ssrc0: same as sop2/ssc0. +; +; Constraints "SSI,SSA","SSA,SSI" +; +; sopp - scalar, one constant input, one special +; simm16 +; +; smem - scalar memory +; sbase: aligned pair of sgprs. Specify {size[15:0], base[47:0]} in +; dwords +; sdata: sgpr0-102, flat_scratch, xnack, vcc, tba, tma +; offset: sgpr or 20bit unsigned byte offset +; +; vop2 - vector, two inputs, one output +; vsrc0: sgpr0-102,flat_scratch,xnack,vcc,tba,ttmp0-11,m0,exec, +; inline constant -16 to -64, fp inline immediate, vccz, execz, +; scc, lds, literal constant, vgpr0-255 +; vsrc1: vgpr0-255 +; vdst: vgpr0-255 +; Limitations: At most one SGPR, at most one constant +; if constant is used, SGPR must be M0 +; Only SRC0 can be LDS_DIRECT +; +; constraints: "=v", "vBSS", "v" +; +; vop1 - vector, one input, one output +; vsrc0: same as vop2/src0 +; vdst: vgpr0-255 +; +; constraints: "=v", "vBSS" +; +; vopc - vector, two inputs, one comparsion output; +; vsrc0: same as vop2/src0 +; vsrc1: vgpr0-255 +; vdst: +; +; constraints: "vASS", "v" +; +; vop3a - vector, three inputs, one output +; vdst: vgpr0-255, for v_cmp sgpr or vcc +; abs,clamp +; vsrc0: sgpr0-102,vcc,tba,ttmp0-11,m0,exec, +; inline constant -16 to -64, fp inline immediate, vccz, execz, +; scc, lds_direct +; FIXME: really missing 1/pi? really 104 SGPRs +; +; vop3b - vector, three inputs, one vector output, one scalar output +; vsrc0,vsrc1,vsrc2: same as vop3a vsrc0 +; vdst: vgpr0-255 +; sdst: sgpr0-103/vcc/tba/tma/ttmp0-11 +; +; +; mult is for insn representing multiple instructions, vmult is for insn +; representing multiple instruction that include vector + +(define_attr "type" + "unknown,sop1,sop2,sopk,sopc,sopp,smem,dsmem,vop2,vop1,vopc, + vop3a,vop3b,vintr,lds,mubuf,mtbuf,exp,flat,mult,vmult" + (const_string "unknown")) + +; Set if instruction is executed in scalar or vector unit + +(define_attr "unit" "unknown,scalar,vector" + (cond [(eq_attr "type" "sop1,sop2,sopk,sopc,sopp,smem,mult") + (const_string "scalar") + (eq_attr "type" "vop2,vop1,vopc,vop3a,vop3b,vintr, + mubuf,mtbuf,flat,vmult") + (const_string "vector")] + (const_string "unknown"))) + +; All vector instructions runs in 64bit threads which as predicated by EXEC +; registers. EXEC register is assumed to be non-zero first time program starts. +; Because scalar operations are often also offloaded to the vector unit, we +; use modes switching to model three states of EXEC register: +; - any: Instruction do not care +; - init: Instruction must be executed with exec passed to function +; - subinit: Instruction must be executed with non-zero EXEC which is subset +; of what was passed to function +; - full: Instruction expect exec to be all ones (for full sized vector ops) + +(define_attr "exec" "any,unknown,init,subinit,user,full" + (cond [(eq_attr "unit" "scalar") + (const_string "any") + (eq_attr "mode" "none,BI,QI,HI,SI,DI,TI,HF,SF,DF,V2SF,V3DI,V4SF, + V2DF,V3SF,V4SI,V2DI,V3SI") + (const_string "init") + (eq_attr "mode" "V64SI,V64DI,V64SF,V64DF,V64HF,V64HI,V64QI") + (const_string "full")] + (const_string "unknown"))) + +;; ------------------------------------------------------------------------- +;; Iterators useful across the wole machine description +;; ------------------------------------------------------------------------- + +; Scalar registers can generally be operated in SI or DI. +(define_mode_iterator SIDI_MODE [SI DI]) +; Double reg vector operations +(define_mode_iterator V64_INT_MODE [DI V64DI]) +; Default pointer is either 32bit or 64bit +;(define_mode_iterator P [(SI "Pmode == SImode") (DI "Pmode == DImode")]) +;(define_mode_iterator V64P [(V64SI "Pmode == SImode") (V64DI "Pmode == DImode")]) + +(include "predicates.md") +(include "constraints.md") + +;; ------------------------------------------------------------------------- +;; Attibutes. +;; ------------------------------------------------------------------------- + +; Translate RTX code into GCN instruction mnemonics +(define_code_attr mnemonic + [(minus "sub%i") + (plus "add%i") + (ashift "lshl%b") + (lshiftrt "lshr%b") + (ashiftrt "ashr%i") + (and "and%b") + (ior "or%b") + (xor "xor%b") + (mult "mul%i") + (smin "min%i") + (smax "max%i") + (umin "min%u") + (umax "max%u")]) + +(define_code_attr revmnemonic + [(minus "subrev%i") + (ashift "lshlrev%b") + (lshiftrt "lshrrev%b") + (ashiftrt "ashrrev%i")]) + +; Translate RTX code into corresponding expander name. +(define_code_attr expander + [(and "and") + (ior "ior") + (xor "xor") + (plus "add") + (minus "sub") + (ashift "ashl") + (lshiftrt "lshr") + (ashiftrt "ashr") + (mult "mul") + (smin "smin") + (smax "smax") + (umin "umin") + (umax "umax")]) + +(define_mode_attr native_mode + [(V64DI "V64SI") (DI "SI")]) +;; ------------------------------------------------------------------------- +;; nop instruction +;; ------------------------------------------------------------------------- + +(define_insn "nop" + [(const_int 0)] + "" + "s_nop\t0x") + +;; ------------------------------------------------------------------------- +;; Trap +;; ------------------------------------------------------------------------- + +(define_insn "trap" + [(trap_if (const_int 1) (const_int 0))] + "" + "s_trap") + +;; ------------------------------------------------------------------------- +;; Moves +;; ------------------------------------------------------------------------- + +; All modes GCN support move operation in +(define_mode_iterator S_MOV_MODE [BI SI DI SF DF V2SI V2SF V64SI V64DI]) +; All modes GCN support move in single vector or scalar reg +(define_mode_iterator S_MOV1_MODE [BI SI SF]) +; All modes GCN support move in pair of vector or scalar regs +(define_mode_iterator S_MOV2_MODE [DI DF]) +; Loads and sctores can do 3,4,8,16 double words. +(define_mode_iterator S_MOV3_MODE [V3SI V3SF]) +(define_mode_iterator S_MOV4_MODE [TI V4SI V4SF V2DI V2DF]) +(define_mode_iterator S_MOV34_MODE [V3SI V3SF TI V4SI V4SF V2DI V2DF]) + +;; All modes we support moves in. +(define_mode_iterator ES_MOV_MODE [BI QI HI SI DI SF DF V2SI V2SF V64SI V64DI]) +(define_expand "mov<mode>" + [(set (match_operand:ES_MOV_MODE 0 "gcn_simple_mem_or_reg_operand") + (match_operand:ES_MOV_MODE 1 "gcn_load_operand"))] + "" +{ + if (gcn_expand_mov(<MODE>mode, operands[0], operands[1])) + DONE; +}) + +; We need BImode move so we can reload flags registers + +;(define_insn "*movbi" + ;[(set (match_operand:BI 0 "register_operand" "=SD ,v") + ;;(match_operand:BI 1 "nonmemory_operand" "SSA,vSSA"))] + ;"" + ;"@ + ;s_mov_b32\t%0, %1 + ;v_mov_b32\t%0, %1" + ;[(set_attr "type" "sop1,vop1") + ;(set_attr "mode" "SI")]) + +; 32bit move pattern + +; FIXME: Ducumentation describe s_store_dword, but it is not accepted by +; LLVM AS +(define_insn "*mov<mode>_scalar" + [(set (match_operand:S_MOV1_MODE 0 "register_operand" "=SD,SD, SD,Sm,v, Sm") + (match_operand:S_MOV1_MODE 1 "gcn_load_operand" "SSA,SSJ,B, RS,SS,v"))] + "" + "@ + s_mov_b32\t%0, %1 + s_movk_i32\t%0, %1 + s_mov_b32\t%0, %1 + s_load_dword\t%0, %A1\n\ts_waitcnt\tlgkmcnt(0) + v_writelane_b32\t%0, %1, 0 + v_readlane_b32\t%0, %1, 0" + [(set_attr "type" "sop1,sopk,sop1,smem,vop3a,vop3a") + (set_attr "mode" "SI")]) + +; FIXME: Why readfirstlane has both VOP1 and VOP3 encoding? +(define_insn_and_split "*mov<mode>" + [(set (match_operand:S_MOV1_MODE 0 "nonimmediate_operand" "=SD, SD, SD,^Sm,v, v, Sm,^v,^Sm,RD,RF,v ,v ") + (match_operand:S_MOV1_MODE 1 "gcn_load_operand" " SSA,SSJ,B, RS,vB,SS,v, SS,v ,v ,v ,RD,RF")) + (use (match_operand:DI 2 "gcn_exec_operand" " n, n, n, n, e, e, e, O, O ,e ,e ,e ,e "))] + "(register_operand (operands[0], VOIDmode) || register_operand (operands[1], VOIDmode)) + && (REG_P (operands[2]) || !gcn_vgpr_move_p (operands[0], operands[1]))" + "@ + # + # + # + # + v_mov_b32\t%0, %1 + v_mov_b32\t%0, %1 + v_readfirstlane_b32\t%0, %1 + # + # + ds_write_b32\t%A0, %1%O0 + flat_store%s0\t%A0, %1 + ds_read_b32\t%0, %A1%O1 + flat_load_dword\t%0, %A1\n\ts_waitcnt\tlgkmcnt(0),vmcnt(0)" + "gcn_sgpr_move_p (operands[0], operands[1]) + || (reload_completed && GET_CODE (operands[2]) == CONST_INT)" + [(set (match_dup 0) (match_dup 1))] +{} + [(set_attr "type" "sop1,sopk,sop1,smem,vop1,vop1,vop1,vop3a,vop3a,dsmem,flat,dsmem,flat") + (set_attr "mode" "SI")]) + +(define_insn_and_split "*mov<mode>_scalar" + [(set (match_operand:S_MOV2_MODE 0 "register_operand" "=SD,SD,Sm,v, Sm") + (match_operand:S_MOV2_MODE 1 "general_operand" "SSB,SSn,RS,SS,v"))] + "" + "@ + s_mov%b0\t%0, %1 + # + s_load%s0\t%0, %A1\n\ts_waitcnt\tlgkmcnt(0) + # + #" + "(reload_completed && !gcn_sgpr_move_p (operands[0], operands[1])) + || (GET_CODE (operands[1]) == CONST_INT && !gcn_constant_p (operands[1]))" + [(set (match_dup 0) (match_dup 1)) + (set (match_dup 2) (match_dup 3))] +{ + gcn_split_operands (<MODE>mode, operands, 2, 2); +} + [(set_attr "type" "sop1,mult,smem,vmult,vmult") + (set_attr "mode" "DI,SI,DI,SI,SI")]) + +(define_insn_and_split "*mov<mode>_scalar" + [(set (match_operand:S_MOV2_MODE 0 "nonimmediate_operand" "=SD,SD,Sm,v, v ,^v, ^Sm,RD,RF,v ,v ") + (match_operand:S_MOV2_MODE 1 "general_operand" "SSB,SSn,RS,vn,SS, SS,v ,v ,v ,RD,RF")) + (use (match_operand:DI 2 "gcn_exec_operand" "n,n,n,e,e,O,O,e,e,e,e"))] + "(register_operand (operands[0], VOIDmode) || register_operand (operands[1], VOIDmode)) + && (REG_P (operands[2]) || !gcn_vgpr_move_p (operands[0], operands[1]))" + "@ + # + # + # + # + # + # + # + ds_write_b64\t%A0, %1%O0 + flat_store%s0\t%A0, %1 + ds_read_b64\t%0, %A1%O1 + flat_load_dword\t%0, %A1\n\ts_waitcnt\tlgkmcnt(0),vmcnt(0)" + "gcn_sgpr_move_p (operands[0], operands[1]) + || (reload_completed && GET_CODE (operands[2]) == CONST_INT)" + [(set (match_dup 0) (match_dup 1))] +{} + [(set_attr "type" "sop1,mult,smem,vmult,vmult,vmult,vmult,dsmem,flat,dsmem,flat") + (set_attr "mode" "DI,SI,DI,SI,SI,SI,SI,DI,DI,DI,DI")]) + + +; Split to move by pieces but be sure that we do not split s_mov_b64 +(define_split + [(set (match_operand:S_MOV2_MODE 0 "register_operand") + (match_operand:S_MOV2_MODE 1 "nonmemory_operand")) + (use (match_operand:DI 2 "register_operand"))] + "gcn_vgpr_register_operand (operands[0], VOIDmode) + || gcn_vgpr_register_operand (operands[1], VOIDmode)" + [(parallel [(set (match_dup 0) (match_dup 1)) (use (match_dup 4))]) + (parallel [(set (match_dup 2) (match_dup 3)) (use (match_dup 4))])] +{ + operands[4] = operands[2]; + gcn_split_operands (<MODE>mode, operands, 2, 2); +}) + +(define_insn "*mov<mode>" + [(set (match_operand:S_MOV34_MODE 0 "register_operand" "=SD,SD, SD,Sm,v, ^v, ^Sm") + (match_operand:S_MOV34_MODE 1 "gcn_load_operand" "SSA,SSJ,B, RS,vB, SS, v")) + (use (match_operand:DI 2 "gcn_exec_operand" "n,n,n,n,e,e,n"))] + "" + "@ + # + # + # + s_load%s0\t%0, %A1\n\ts_waitcnt\tlgkmcnt(0) + # + # + #" + [(set_attr "type" "mult,mult,mult,smem,vmult,vmult,vmult") + (set_attr "mode" "DI,SI,SI,<MODE>,SI,SI,SI")]) + +; Watch for partial overlap - register triples are not aligned. +(define_split + [(set (match_operand:S_MOV3_MODE 0 "register_operand") + (match_operand:S_MOV3_MODE 1 "nonmemory_operand"))] + "(REG_P (operands[0]) && REG_P (operands[1])) + && REGNO (operands[0]) > REGNO (operands[1]) + && REGNO (operands[0]) < REGNO (operands[1] + 3)" + [(set (match_dup 0) (match_dup 1)) + (set (match_dup 2) (match_dup 3)) + (set (match_dup 4) (match_dup 5))] +{ + gcn_split_operands (<MODE>mode, operands, 3, 2); +}) + + +(define_split + [(set (match_operand:S_MOV3_MODE 0 "register_operand") + (match_operand:S_MOV3_MODE 1 "nonmemory_operand"))] + "reload_completed" + [(set (match_dup 0) (match_dup 1)) + (set (match_dup 2) (match_dup 3))] +{ + operands[2] = gcn_operand_part (GET_MODE (operands[0]), operands[0], 2); + operands[3] = gcn_operand_part (GET_MODE (operands[0]), operands[1], 2); + operands[0] = gcn_operand_doublepart + (GET_MODE (operands[0]), operands[0], 0); + operands[1] = gcn_operand_doublepart + (GET_MODE (operands[0]), operands[1], 0); +}) + +(define_split + [(set (match_operand:S_MOV4_MODE 0 "register_operand") + (match_operand:S_MOV4_MODE 1 "nonmemory_operand"))] + "reload_completed" + [(set (match_dup 0) (match_dup 1)) + (set (match_dup 2) (match_dup 3))] +{ + operands[2] = gcn_operand_doublepart (GET_MODE (operands[0]), operands[0], 1); + operands[3] = gcn_operand_doublepart (GET_MODE (operands[0]), operands[1], 1); + operands[0] = gcn_operand_doublepart (GET_MODE (operands[0]), operands[0], 0); + operands[1] = gcn_operand_doublepart (GET_MODE (operands[0]), operands[1], 0); +}) + +;; ------------------------------------------------------------------------- +;; Prologue/Epilogue +;; ------------------------------------------------------------------------- + +(define_insn "prologue_use" + [(unspec_volatile [(match_operand 0)] UNSPECV_PROLOGUE_USE)] + "" + "") + +;; Flat scratch initialization. +(define_expand "prologue" + [(const_int 0)] + "" +{ + gcn_expand_prologue (); +}) + +(define_expand "epilogue" + [(simple_return)]) + +;; ------------------------------------------------------------------------- +;; Control flow +;; ------------------------------------------------------------------------- + +(define_insn "jump" + [(set (pc) + (label_ref (match_operand 0)))] + "" + "s_branch\t%0" + [(set_attr "type" "sopp")]) + +(define_insn "cjump" + [(set (pc) + (if_then_else (match_operator:BI 1 "gcn_conditional_operator" + [(match_operand:BI + 2 "gcn_conditional_register_operand" "ca") + (const_int 0)]) + (label_ref (match_operand 0)) + (pc)))] + "" + "s_cbranch%C1\t%0" + [(set_attr "type" "sopp")]) + +(define_insn "return" + [(simple_return)] + "" + "s_endpgm" + [(set_attr "type" "sopp")]) + +;; ------------------------------------------------------------------------- +;; Conditionals +;; ------------------------------------------------------------------------- + +(define_insn "cstoresi4" + [(set (match_operand:BI 0 "gcn_conditional_register_operand" "=cs,cs,cs,cs") + (match_operator:BI 1 "gcn_compare_operator" + [(match_operand:SI 2 "gcn_alu_operand" "SSA,SSA,SSB,SS") + (match_operand:SI 3 "gcn_alu_operand" "SSA,SSK,SS ,SSB")]))] + "" + "@ + s_cmp%D1\t%2, %3 + s_cmpk%D1\t%2, %3 + s_cmp%D1\t%2, %3 + s_cmp%D1\t%2, %3" + [(set_attr "type" "sopc,sopk,sopk,sopk") + (set_attr "mode" "SI")]) + +(define_expand "cbranchsi4" + [(match_operator 0 "gcn_compare_operator" + [(match_operand:SI 1 "gcn_alu_operand") + (match_operand:SI 2 "gcn_alu_operand")]) + (match_operand 3)] + "" +{ + rtx cc = gen_reg_rtx (BImode); + emit_insn (gen_cstoresi4 (cc, operands[0], operands[1], operands[2])); + emit_jump_insn (gen_cjump (operands[3], gen_rtx_NE (BImode, cc, const0_rtx), + cc)); + DONE; +}) + +; FIXME: s_cmp_eq_64 is not accepted by llvm-as. + +(define_insn "cstoredi4_vec_and_scalar" + [(set (match_operand:BI 0 "gcn_conditional_register_operand" "=cs,cv,cv") + (match_operator:BI 1 "gcn_compare_64bit_operator" + [(match_operand:DI 2 "gcn_alu_operand" "%SSA,vSS,v") + (match_operand:DI 3 "gcn_alu_operand" " SSB,v, vB")])) + (use (match_operand:DI 4 "gcn_exec_operand" "n,e,e"))] + "0" + "@ + # + v_cmp%E1\tvcc, %3, %2 + v_cmp%E1\tvcc, %3, %2" + [(set_attr "type" "unknown,vopc,vopc") + (set_attr "mode" "DI")]) + +(define_split + [(set (match_operand:BI 0 "gcn_conditional_register_operand" "") + (match_operator:BI 1 "gcn_compare_64bit_operator" + [(match_operand:DI 2 "gcn_alu_operand" "") + (match_operand:DI 3 "gcn_alu_operand" "")])) + (use (match_operand:DI 4 "" ""))] + "REG_P (operands[0]) && REGNO (operands[0]) == SCC_REG && 0" + [(set (match_dup 0) + (match_op_dup 1 [(match_dup 2) (match_dup 3)]))]) + +(define_insn "cstoredi4_scalar" + [(set (match_operand:BI 0 "gcn_conditional_register_operand" "=cs") + (match_operator 1 "gcn_compare_64bit_operator" + [(match_operand:DI 2 "gcn_alu_operand" "%SSA") + (match_operand:DI 3 "gcn_alu_operand" "SSB")]))] + "0" + "s_cmp%D1\t%2, %3" + [(set_attr "type" "vopc") + (set_attr "mode" "DI")]) + +(define_insn "cstoredi4" + [(set (match_operand:BI 0 "gcn_conditional_register_operand" "=cv") + (match_operator:BI 1 "gcn_compare_operator" + [(match_operand:DI 2 "gcn_alu_operand" "vSSA") + (match_operand:DI 3 "gcn_alu_operand" "v")])) + (use (match_operand:DI 4 "gcn_exec_operand" "e"))] + "" + "v_cmp%E1\tvcc, %2, %3" + [(set_attr "type" "vopc") + (set_attr "mode" "DI")]) + +(define_expand "cbranchdi4" + [(match_operator 0 "gcn_compare_operator" + [(match_operand:DI 1 "gcn_alu_operand") + (match_operand:DI 2 "gcn_alu_operand")]) + (match_operand 3)] + "" +{ + rtx cc = gen_reg_rtx (BImode); + emit_insn (gen_cstoredi4 (cc, operands[0], operands[1], operands[2], + gcn_scalar_exec ())); + emit_jump_insn (gen_cjump (operands[3], gen_rtx_NE (BImode, cc, const0_rtx), + cc)); + DONE; +}) + +;; ------------------------------------------------------------------------- +;; ALU special cases: Plus +;; ------------------------------------------------------------------------- + +(define_code_iterator plus_minus [plus minus]) + +(define_expand "<expander>si3" + [(parallel [(set (match_operand:SI 0 "register_operand") + (plus_minus:SI + (match_operand:SI 1 "gcn_alu_operand") + (match_operand:SI 2 "gcn_alu_operand"))) + (use (match_dup 3)) + (clobber (reg:BI SCC_REG)) + (clobber (reg:CC VCC_REG))])] + "" +{ + operands[3] = gcn_scalar_exec (); +}) + +(define_insn "*addsi3_vec_and_scalar" + [(set (match_operand:SI 0 "register_operand" "=SD, SD, SD, v") + (plus:SI + (match_operand:SI 1 "gcn_alu_operand" "%SSA,0 ,SSA,v") + (match_operand:SI 2 "gcn_alu_operand" " SSA,SSJ,B, vBSS"))) + (use (match_operand:DI 3 "gcn_exec_operand" "n,n,n,v")) + (clobber (reg:BI SCC_REG)) + (clobber (reg:CC VCC_REG))] + "" + "#") + +(define_predicate "plus_minus_operator" + (match_code "plus,minus")) + +(define_split + [(set (match_operand:SIDI_MODE 0 "register_operand" "") + (match_operator:SIDI_MODE 3 "plus_minus_operator" + [(match_operand:SIDI_MODE 1 "gcn_alu_operand" "") + (match_operand:SIDI_MODE 2 "gcn_alu_operand" "")])) + (use (match_operand:DI 4 "" "")) + (clobber (reg:BI SCC_REG)) + (clobber (reg:CC VCC_REG))] + "gcn_sgpr_register_operand (operands[0], VOIDmode)" + [(parallel [(set (match_dup 0) (match_op_dup 3 [(match_dup 1) (match_dup 2)])) + (clobber (reg:BI SCC_REG))])]) + +(define_split + [(set (match_operand:SIDI_MODE 0 "register_operand" "") + (match_operator:SIDI_MODE 3 "plus_minus_operator" + [(match_operand:SIDI_MODE 1 "gcn_alu_operand" "") + (match_operand:SIDI_MODE 2 "gcn_alu_operand" "")])) + (use (match_operand:DI 4 "" "")) + (clobber (reg:BI SCC_REG)) + (clobber (reg:CC VCC_REG))] + "gcn_vgpr_register_operand (operands[0], VOIDmode)" + [(parallel [(set (match_dup 0) (match_op_dup 3 [(match_dup 1) (match_dup 2)])) + (use (match_dup 4)) + (clobber (reg:CC VCC_REG))])]) + +(define_insn "*addsi3_scalar" + [(set (match_operand:SI 0 "register_operand" "=SD, SD, SD") + (plus:SI + (match_operand:SI 1 "gcn_alu_operand" "%SSA,0 ,SSA") + (match_operand:SI 2 "gcn_alu_operand" " SSA,SSJ,B"))) + (clobber (reg:BI SCC_REG))] + "" + "@ + s_add_i32\t%0, %1, %2 + s_addk_i32\t%0, %2 + s_add_i32\t%0, %1, %2" + [(set_attr "type" "sop2,sopk,sop2") + (set_attr "mode" "SI")]) + + +; FIXME: Implemented for now only in SCC registers. Vectors are analogous +; but we need to expand into vector patterns. +(define_expand "adddi3" + [(match_operand:DI 0 "register_operand") + (match_operand:DI 1 "gcn_alu_operand") + (match_operand:DI 2 "gcn_alu_operand")] + "" +{ + emit_insn (gen_addsi3_scalar_carry ( + gcn_operand_part (DImode, operands[0], 0), + gcn_operand_part (DImode, operands[1], 0), + gcn_operand_part (DImode, operands[2], 0))); + rtx val = gcn_operand_part (DImode, operands[2], 1); + if (val != const0_rtx) + emit_insn (gen_addcsi3_scalar ( + gcn_operand_part (DImode, operands[0], 1), + gcn_operand_part (DImode, operands[1], 1), + gcn_operand_part (DImode, operands[2], 1))); + else + emit_insn (gen_addcsi3_scalar_zero ( + gcn_operand_part (DImode, operands[0], 1), + gcn_operand_part (DImode, operands[1], 1))); + DONE; +}) + +(define_insn "addsi3_scalar_carry" + [(set (match_operand:SI 0 "register_operand" "=SD") + (plus:SI (match_operand:SI 1 "gcn_alu_operand" "%SSA") + (match_operand:SI 2 "gcn_alu_operand" "SSB"))) + (set (reg:BI SCC_REG) + (ltu:BI (plus:SI (match_dup 1) + (match_dup 2)) + (match_dup 1)))] + "" + "s_add_u32\t%0, %1, %2" + [(set_attr "type" "sop2") + (set_attr "mode" "SI")]) + +(define_insn "addsi3_scalar_carry_cst" + [(set (match_operand:SI 0 "register_operand" "=SD") + (plus:SI (match_operand:SI 1 "gcn_alu_operand" "SSA") + (match_operand:SI 2 "const_int_operand" "n"))) + (set (reg:BI SCC_REG) + (geu:BI (plus:SI (match_dup 1) + (match_dup 2)) + (match_operand:SI 3 "const_int_operand" "n")))] + "INTVAL (operands[2]) == -INTVAL (operands[3])" + "s_add_u32\t%0, %1, %2" + [(set_attr "type" "sop2") + (set_attr "mode" "SI")]) + +(define_insn "addcsi3_scalar" + [(set (match_operand:SI 0 "register_operand" "=SD") + (plus:SI (plus:SI (zero_extend:SI (reg:BI SCC_REG)) + (match_operand:SI 1 "gcn_alu_operand" "%SSA")) + (match_operand:SI 2 "gcn_alu_operand" "SSB"))) + (set (reg:BI SCC_REG) + (ior:BI (ltu:BI (plus:SI (plus:SI (zero_extend:SI (reg:BI SCC_REG)) + (match_dup 1)) + (match_dup 2)) + (match_dup 2)) + (ltu:BI (plus:SI (zero_extend:SI (reg:BI SCC_REG)) (match_dup 1)) + (match_dup 1))))] + "" + "s_addc_u32\t%0, %1, %2" + [(set_attr "type" "sop2") + (set_attr "mode" "SI")]) + +(define_insn "addcsi3_scalar_zero" + [(set (match_operand:SI 0 "register_operand" "=SD") + (plus:SI (zero_extend:SI (reg:BI SCC_REG)) + (match_operand:SI 1 "gcn_alu_operand" "SSA"))) + (set (reg:BI SCC_REG) + (ltu:BI (plus:SI (zero_extend:SI (reg:BI SCC_REG)) (match_dup 1)) + (match_dup 1)))] + "" + "s_addc_u32\t%0, %1, 0" + [(set_attr "type" "sop2") + (set_attr "mode" "SI")]) + +;; ------------------------------------------------------------------------- +;; ALU special cases: Minus +;; ------------------------------------------------------------------------- + +(define_expand "subdi3" + [(match_operand:DI 0 "register_operand") + (match_operand:DI 1 "gcn_alu_operand") + (match_operand:DI 2 "gcn_alu_operand")] + "" +{ + emit_insn (gen_subsi3_scalar_carry ( + gcn_operand_part (DImode, operands[0], 0), + gcn_operand_part (DImode, operands[1], 0), + gcn_operand_part (DImode, operands[2], 0))); + rtx val = gcn_operand_part (DImode, operands[2], 1); + if (val != const0_rtx) + emit_insn (gen_subcsi3_scalar ( + gcn_operand_part (DImode, operands[0], 1), + gcn_operand_part (DImode, operands[1], 1), + gcn_operand_part (DImode, operands[2], 1))); + else + emit_insn (gen_subcsi3_scalar_zero ( + gcn_operand_part (DImode, operands[0], 1), + gcn_operand_part (DImode, operands[1], 1))); + DONE; +}) + +(define_insn "subsi3_scalar_carry" + [(set (match_operand:SI 0 "register_operand" "=SD, SD") + (minus:SI (match_operand:SI 1 "gcn_alu_operand" "SSA,SSB") + (match_operand:SI 2 "gcn_alu_operand" "SSB,SSA"))) + (set (reg:BI SCC_REG) + (gtu:BI (minus:SI (match_dup 1) + (match_dup 2)) + (match_dup 1)))] + "" + "s_sub_u32\t%0, %1, %2" + [(set_attr "type" "sop2") + (set_attr "mode" "SI")]) + +(define_insn "subsi3_scalar_carry_cst" + [(set (match_operand:SI 0 "register_operand" "=SD") + (minus:SI (match_operand:SI 1 "gcn_alu_operand" "SSA") + (match_operand:SI 2 "const_int_operand" "n"))) + (set (reg:BI SCC_REG) + (leu:BI (minus:SI (match_dup 1) + (match_dup 2)) + (match_operand:SI 3 "const_int_operand" "n")))] + "INTVAL (operands[2]) == -INTVAL (operands[3])" + "s_sub_u32\t%0, %1, %2" + [(set_attr "type" "sop2") + (set_attr "mode" "SI")]) + +(define_insn "subcsi3_scalar" + [(set (match_operand:SI 0 "register_operand" "=SD, SD") + (minus:SI (minus:SI (zero_extend:SI (reg:BI SCC_REG)) + (match_operand:SI 1 "gcn_alu_operand" "SSA,SSB")) + (match_operand:SI 2 "gcn_alu_operand" "SSB,SSA"))) + (set (reg:BI SCC_REG) + (ior:BI (gtu:BI (minus:SI (minus:SI (zero_extend:SI (reg:BI SCC_REG)) + (match_dup 1)) + (match_dup 2)) + (match_dup 1)) + (gtu:BI (minus:SI (zero_extend:SI (reg:BI SCC_REG)) (match_dup 1)) + (match_dup 1))))] + "" + "s_subb_u32\t%0, %1, %2" + [(set_attr "type" "sop2") + (set_attr "mode" "SI")]) + +(define_insn "subcsi3_scalar_zero" + [(set (match_operand:SI 0 "register_operand" "=SD") + (minus:SI (zero_extend:SI (reg:BI SCC_REG)) + (match_operand:SI 1 "gcn_alu_operand" "SSA"))) + (set (reg:BI SCC_REG) + (gtu:BI (minus:SI (zero_extend:SI (reg:BI SCC_REG)) (match_dup 1)) + (match_dup 1)))] + "" + "s_subb_u32\t%0, %1, 0" + [(set_attr "type" "sop2") + (set_attr "mode" "SI")]) + +;; ------------------------------------------------------------------------- +;; ALU: mult +;; ------------------------------------------------------------------------- + +(define_expand "mulsi3" + [(set (match_operand:SI 0 "register_operand") + (mult:SI + (match_operand:SI 1 "gcn_alu_operand") + (match_operand:SI 2 "gcn_alu_operand"))) + (use (match_dup 3))] +"" +{ + operands[3] = gcn_scalar_exec (); +}) + +; Vector multiply has vop3a encoding, but no corresponding vop2a, so no long +; immediate. +(define_insn_and_split "*mulsi3_vec_and_scalar" + [(set (match_operand:SI 0 "register_operand" "=SD, SD,SD, v") + (mult:SI + (match_operand:SI 1 "gcn_alu_operand" "%SSA,0 ,SSA,v") + (match_operand:SI 2 "gcn_alu_operand" " SSA,J, B, vASS"))) + (use (match_operand:DI 3 "gcn_exec_operand" "n,n,n,e"))] + "" + "@ + # + # + # + v_mul_lo_i32\t%0, %1, %2" + "gcn_sgpr_register_operand (operands[0], VOIDmode)" + [(set (match_operand:SI 0 "register_operand" "=SD, SD,SD, v") + (mult:SI + (match_operand:SI 1 "gcn_alu_operand" "%SSA,0 ,SSA,vSS") + (match_operand:SI 2 "gcn_alu_operand" " SSA,J, B, vA")))] +{} + [(set_attr "type" "sop2,sopk,sop2,vop3a") + (set_attr "mode" "SI")]) + +(define_insn "*mulsi3_scalar" + [(set (match_operand:SI 0 "register_operand" "=SD, SD,SD") + (mult:SI + (match_operand:SI 1 "gcn_alu_operand" "%SSA,0 ,SSA") + (match_operand:SI 2 "gcn_alu_operand" " SSA,J, B")))] + "" + "@ + s_mul_i32\t%0, %1, %2 + s_mulk_i32\t%0, %2 + s_mul_i32\t%0, %1, %2" + [(set_attr "type" "sop2,sopk,sop2") + (set_attr "mode" "SI")]) + +;; ------------------------------------------------------------------------- +;; ALU the generic 32bit case +;; ------------------------------------------------------------------------- + +(define_code_iterator vec_and_scalar [and ior xor ashift lshiftrt + ashiftrt smin smax umin umax]) + +(define_expand "<expander>si3" + [(parallel [(set (match_operand:SI 0 "register_operand") + (vec_and_scalar:SI + (match_operand:SI 1 "gcn_alu_operand") + (match_operand:SI 2 "gcn_alu_operand"))) + (use (match_dup 3)) + (clobber (reg:BI SCC_REG))])] + "" +{ + operands[3] = gcn_scalar_exec (); +}) + +;; No plus and mult - they have variant with 16bit immediate and thus are defined later. +(define_code_iterator vec_and_scalar_com [and ior xor smin smax umin umax]) + +(define_insn "*<expander>si3" + [(set (match_operand:SI 0 "register_operand" "=SD,v") + (vec_and_scalar_com:SI (match_operand:SI 1 "gcn_alu_operand" "%SSA,v") + (match_operand:SI 2 "gcn_alu_operand" "SSB,vSSB"))) + (use (match_operand:DI 3 "gcn_exec_operand" "n,e")) + (clobber (reg:BI SCC_REG))] + "" + "#" + [(set_attr "type" "sop2,vop2") + (set_attr "mode" "SI")]) + +(define_insn "*<expander>si3_scalar" + [(set (match_operand:SI 0 "register_operand" "=SD,v") + (vec_and_scalar_com:SI (match_operand:SI 1 "register_operand" "%SSA,v") + (match_operand:SI 2 "gcn_alu_operand" "SSB,vSSB"))) + (clobber (reg:BI SCC_REG))] + "" + "s_<mnemonic>0\t%0, %1, %2" + [(set_attr "type" "sop2") + (set_attr "mode" "SI")]) + +(define_code_iterator vec_and_scalar_nocom [ashift lshiftrt ashiftrt]) + +(define_insn "*<expander>si3_vec_and_scalar" + [(set (match_operand:SI 0 "register_operand" "=SD,SD,v") + (vec_and_scalar_nocom:SI (match_operand:SI 1 "gcn_alu_operand" "SSB,SSA,v") + (match_operand:SI 2 "gcn_alu_operand" "SSA,SSB,vSSB"))) + (use (match_operand:DI 3 "gcn_exec_operand" "n,n,e")) + (clobber (reg:BI SCC_REG))] + "" + "#" + [(set_attr "type" "sop2,sop2,vop2") + (set_attr "mode" "SI")]) + +(define_insn "<expander>si3_scalar" + [(set (match_operand:SI 0 "register_operand" "=SD,SD") + (vec_and_scalar_nocom:SI (match_operand:SI 1 "gcn_alu_operand" "SSB,SSA") + (match_operand:SI 2 "gcn_alu_operand" "SSA,SSB"))) + (clobber (reg:BI SCC_REG))] + "" + "@ + s_<mnemonic>0\t%0, %1, %2 + s_<mnemonic>0\t%0, %1, %2" + [(set_attr "type" "sop2,sop2") + (set_attr "mode" "SI")]) + +;; ------------------------------------------------------------------------- +;; ALU the generic 64bit case +;; ------------------------------------------------------------------------- + +(define_code_iterator vec_and_scalar64_com [and ior xor]) + +(define_expand "<expander>di3" + [(set (match_operand:DI 0 "register_operand") + (vec_and_scalar64_com:DI (match_operand:DI 1 "gcn_alu_operand") + (match_operand:DI 2 "gcn_alu_operand"))) + (use (match_dup 3)) + (clobber (reg:BI SCC_REG))] + "" +{ + operands[3] = gcn_scalar_exec (); +}) + +(define_insn "*<expander>di3_vec_and_scalar" + [(set (match_operand:DI 0 "register_operand" "=SD,v") + (vec_and_scalar64_com:DI (match_operand:DI 1 "register_operand" "%SSA,v") + (match_operand:DI 2 "gcn_alu_operand" "SSB,vSSB"))) + (use (match_operand:DI 3 "gcn_exec_operand" "n,e")) + (clobber (reg:BI SCC_REG))] + "" + "#" + [(set_attr "type" "sop2,vop2") + (set_attr "mode" "DI")]) + +(define_insn "*<expander>di3_scalar" + [(set (match_operand:DI 0 "register_operand" "=SD") + (vec_and_scalar64_com:DI (match_operand:DI 1 "register_operand" "%SSA") + (match_operand:DI 2 "gcn_alu_operand" "SSB"))) + (clobber (reg:BI SCC_REG))] + "" + "s_<mnemonic>0\t%0, %1, %2" + [(set_attr "type" "sop2") + (set_attr "mode" "DI")]) + +(define_expand "<expander>di3" + [(set (match_operand:DI 0 "register_operand") + (vec_and_scalar_nocom:DI (match_operand:DI 1 "gcn_alu_operand") + (match_operand:DI 2 "gcn_alu_operand"))) + (use (match_dup 3)) + (clobber (reg:BI SCC_REG))] + "" +{ + operands[3] = gcn_scalar_exec (); +}) + +(define_insn "*<expander>di3_vec_and_scalar" + [(set (match_operand:DI 0 "register_operand" "=SD,SD,v") + (vec_and_scalar_nocom:DI + (match_operand:DI 1 "gcn_alu_operand" "SSB,SSA,v") + (match_operand:SI 2 "gcn_alu_operand" "SSA,SSB,vSSB"))) + (clobber (reg:BI SCC_REG))] + "" + "s_<mnemonic>0\t%0, %1, %2" + [(set_attr "type" "sop2,sop2,vop2") + (set_attr "mode" "DI")]) + +(define_insn "*<expander>di3_scalar" + [(set (match_operand:DI 0 "register_operand" "=SD,SD") + (vec_and_scalar_nocom:DI + (match_operand:DI 1 "gcn_alu_operand" "SSB,SSA") + (match_operand:SI 2 "gcn_alu_operand" "SSA,SSB"))) + (clobber (reg:BI SCC_REG))] + "" + "s_<mnemonic>0\t%0, %1, %2" + [(set_attr "type" "sop2,sop2") + (set_attr "mode" "DI")]) + +;; ------------------------------------------------------------------------- +;; Generic splitters choosing proper insn variant once we decided on using +;; vector or scalar ALU +;; ------------------------------------------------------------------------- + +(define_split + [(set (match_operand 0 "gcn_sgpr_register_operand") + (match_operator 4 "binary_operator" + [(match_operand 1 "gcn_alu_operand") + (match_operand 2 "gcn_alu_operand")])) + (use (match_operand:DI 3 "")) + (clobber (reg:BI SCC_REG))] + "" + [(parallel [(set (match_dup 0) + (match_op_dup 4 [(match_dup 1) (match_dup 2)])) + (clobber (reg:BI SCC_REG))])]) + +(define_split + [(set (match_operand 0 "gcn_vgpr_register_operand") + (match_operator 4 "binary_operator" + [(match_operand 1 "gcn_alu_operand") + (match_operand 2 "gcn_alu_operand")])) + (use (match_operand:DI 3 "")) + (clobber (reg:BI SCC_REG))] + "" + [(parallel [(set (match_dup 0) + (match_op_dup 4 [(match_dup 1) (match_dup 2)])) + (use (match_dup 3))])]) + + +(include "gcn-valu.md") diff --git a/gcc/config/gcn/gcn.opt b/gcc/config/gcn/gcn.opt new file mode 100644 index 00000000000..ffb5547adbd --- /dev/null +++ b/gcc/config/gcn/gcn.opt @@ -0,0 +1,40 @@ +; Options for the GCN port of the compiler. + +; Copyright (C) 2016-2017 Free Software Foundation, Inc. +; +; This file is part of GCC. +; +; GCC is free software; you can redistribute it and/or modify it under +; the terms of the GNU General Public License as published by the Free +; Software Foundation; either version 3, or (at your option) any later +; version. +; +; GCC is distributed in the hope that it will be useful, but WITHOUT ANY +; WARRANTY; without even the implied warranty of MERCHANTABILITY or +; FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +; for more details. +; +; You should have received a copy of the GNU General Public License +; along with GCC; see the file COPYING3. If not see +; <http://www.gnu.org/licenses/>. + +HeaderInclude +config/gcn/gcn-opts.h + +Enum +Name(gpu_type) Type(enum processor_type) +GCN GPU type to use: + +EnumValue +Enum(gpu_type) String(carrizo) Value(PROCESSOR_CARRIZO) + +EnumValue +Enum(gpu_type) String(fiji) Value(PROCESSOR_FIJI) + +march= +Target RejectNegative Joined ToLower Enum(gpu_type) Var(gcn_arch) Init(PROCESSOR_CARRIZO) +Specify the name of the target GPU. + +mtune= +Target RejectNegative Joined ToLower Enum(gpu_type) Var(gcn_tune) Init(PROCESSOR_CARRIZO) +Specify the name of the target GPU. diff --git a/gcc/config/gcn/predicates.md b/gcc/config/gcn/predicates.md new file mode 100644 index 00000000000..137c39c69be --- /dev/null +++ b/gcc/config/gcn/predicates.md @@ -0,0 +1,167 @@ +;; Predicate definitions for GCN. +;; Copyright (C) 2016-2017 Free Software Foundation, Inc. +;; +;; This file is part of GCC. +;; +;; GCC is free software; you can redistribute it and/or modify +;; it under the terms of the GNU General Public License as published by +;; the Free Software Foundation; either version 3, or (at your option) +;; any later version. +;; +;; GCC is distributed in the hope that it will be useful, +;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;; GNU General Public License for more details. +;; +;; You should have received a copy of the GNU General Public License +;; along with GCC; see the file COPYING3. If not see +;; <http://www.gnu.org/licenses/>. +;; Return true if VALUE can be stored in a sign extended immediate field. + +(define_predicate "gcn_16bit_immediate_operand" + (and (match_code "const_int") + (match_test "satisfies_constraint_I (op)"))) + +(define_predicate "gcn_conditional_register_operand" + (match_operand 0 "register_operand") +{ + if (GET_CODE (op) == SUBREG) + op = SUBREG_REG (op); + + if (!REG_P (op)) + return 0; + + return REGNO (op) == VCCZ_REG + || REGNO (op) == SCC_REG + || REGNO (op) == EXECZ_REG + || REGNO (op) >= FIRST_PSEUDO_REGISTER; +}) + +(define_predicate "gcn_sgpr_register_operand" + (match_operand 0 "register_operand") +{ + if (GET_CODE (op) == SUBREG) + op = SUBREG_REG (op); + + if (!REG_P (op)) + return 0; + + return REGNO (op) < FIRST_PSEUDO_REGISTER && !VGPR_REGNO_P (REGNO (op)); +}) + +(define_predicate "gcn_simple_mem_or_reg_operand" + (match_operand 0 "nonimmediate_operand") +{ + if (GET_CODE (op) == MEM + && GET_CODE (XEXP (op, 0)) != REG) + return false; + return true; +}) + +(define_predicate "gcn_vgpr_register_operand" + (match_operand 0 "register_operand") +{ + if (GET_CODE (op) == SUBREG) + op = SUBREG_REG (op); + + if (!REG_P (op)) + return 0; + + return VGPR_REGNO_P (REGNO (op)); +}) + +(define_predicate "gcn_inline_immediate_operand" + (match_code "const_int,const_double,const_vector") +{ + return gcn_inline_constant_p (op); +}) + +(define_predicate "gcn_vec0_operand" + (match_code "const_vector") +{ + return CONST_VECTOR_ELT (op, 0) == const0_rtx && gcn_inline_constant_p (op); +}) + +(define_predicate "gcn_vec1_operand" + (match_code "const_vector") +{ + return CONST_VECTOR_ELT (op, 0) == const1_rtx && gcn_inline_constant_p (op); +}) + +(define_predicate "gcn_32bit_immediate_operand" + (match_code "const_int,const_double,const_vector") +{ + return gcn_constant_p (op); +}) + +; LRA works smoother when exec values are immediate constants +; prior register allocation. +(define_predicate "gcn_exec_operand" + (ior (match_operand 0 "register_operand") + (match_code "const_int"))) + +(define_predicate "gcn_exec_reg_operand" + (match_operand 0 "register_operand")) + +(define_predicate "gcn_load_operand" + (ior (match_operand 0 "nonimmediate_operand") + (match_operand 0 "gcn_32bit_immediate_operand"))) + +(define_predicate "gcn_alu_operand" + (ior (match_operand 0 "register_operand") + (match_operand 0 "gcn_32bit_immediate_operand"))) + +(define_predicate "gcn_ds_memory_operand" + (and (match_code "mem") + (and (match_test "MEM_ADDR_SPACE (op) == ADDR_SPACE_LDS || MEM_ADDR_SPACE (op) == ADDR_SPACE_GDS") + (match_operand 0 "memory_operand")))) + +(define_predicate "gcn_valu_dst_operand" + (ior (match_operand 0 "register_operand") + (match_operand 0 "gcn_ds_memory_operand"))) + +(define_predicate "gcn_valu_src0_operand" + (ior (match_operand 0 "register_operand") + (ior (match_operand 0 "gcn_32bit_immediate_operand") + (match_operand 0 "gcn_ds_memory_operand")))) + +(define_predicate "gcn_valu_src1_operand" + (match_operand 0 "register_operand")) + +(define_predicate "gcn_valu_src1com_operand" + (ior (match_operand 0 "register_operand") + (match_operand 0 "gcn_32bit_immediate_operand"))) + +(define_predicate "gcn_conditional_operator" + (match_code "eq,ne")) + +(define_predicate "gcn_compare_64bit_operator" + (match_code "eq,ne")) + +(define_predicate "gcn_compare_operator" + (match_code "eq,ne,gt,ge,lt,le,gtu,geu,ltu,leu")) + +(define_predicate "vec_and_scalar_commutative_64bit_operator" + (match_code "and,ior,xor")) + +(define_predicate "vec_and_scalar_64bit_operator" + (match_code "and,ior,xor,ashift,lshiftrt,ashiftrt")) + +(define_predicate "binary_operator" + (match_code "and,ior,xor,ashift,lshiftrt,ashiftrt")) + +(define_predicate "gcn_register_or_unspec_operand" + (ior (match_operand 0 "register_operand") + (and (match_code "unspec") + (match_test "XINT (op, 1) == UNSPEC_VECTOR")))) + +(define_predicate "gcn_register_ds_or_unspec_operand" + (ior (match_operand 0 "register_operand") + (ior (match_operand 0 "gcn_ds_memory_operand") + (and (match_code "unspec") + (match_test "XINT (op, 1) == UNSPEC_VECTOR"))))) + +(define_predicate "gcn_buffer_memory_operand" + (and (match_code "mem") + (and (match_test "MEM_ADDR_SPACE (op) == ADDR_SPACE_SCRATCH") + (match_operand 0 "memory_operand")))) diff --git a/gcc/config/gcn/t-gcn-elf b/gcc/config/gcn/t-gcn-elf new file mode 100644 index 00000000000..2b378e2f8eb --- /dev/null +++ b/gcc/config/gcn/t-gcn-elf @@ -0,0 +1,21 @@ +# Copyright (C) 2016-2017 Free Software Foundation, Inc. +# +# This file is free software; you can redistribute it and/or modify it under +# the terms of the GNU General Public License as published by the Free +# Software Foundation; either version 3 of the License, or (at your option) +# any later version. +# +# This file is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +# for more details. +# +# You should have received a copy of the GNU General Public License +# along with GCC; see the file COPYING3. If not see +# <http://www.gnu.org/licenses/>. + +# Multi-lib support. + +gcn-c.o: $(srcdir)/config/gcn/gcn-c.c + $(COMPILE) $< + $(POSTCOMPILE) diff --git a/gcc/emit-rtl.c b/gcc/emit-rtl.c index 6951f61703b..e86c7ba4f4b 100644 --- a/gcc/emit-rtl.c +++ b/gcc/emit-rtl.c @@ -1688,6 +1688,9 @@ get_mem_align_offset (rtx mem, unsigned int align) tree expr; unsigned HOST_WIDE_INT offset; + if (MEM_ALIGN (mem) >= align) + return 0; + /* This function can't use if (!MEM_EXPR (mem) || !MEM_OFFSET_KNOWN_P (mem) || (MAX (MEM_ALIGN (mem), diff --git a/gcc/explow.c b/gcc/explow.c index 50074e281ed..b38664c0e36 100644 --- a/gcc/explow.c +++ b/gcc/explow.c @@ -401,7 +401,14 @@ memory_address_addr_space (machine_mode mode, rtx x, addr_space_t as) /* By passing constant addresses through registers we get a chance to cse them. */ if (! cse_not_expected && CONSTANT_P (x) && CONSTANT_ADDRESS_P (x)) - x = force_reg (address_mode, x); + { + x = force_reg (address_mode, x); + /* Most usually register is fine for memory expression. + For GCN scalar registers are not always valid way to address + memory and needs to be converted to vector pointers. */ + if (!memory_address_addr_space_p (mode, x, as)) + return memory_address_addr_space (mode, x, as); + } /* We get better cse by rejecting indirect addressing at this stage. Let the combiner create indirect addresses where appropriate. diff --git a/gcc/expr.c b/gcc/expr.c index 0e8216ba7d0..8abee9434bd 100644 --- a/gcc/expr.c +++ b/gcc/expr.c @@ -10458,6 +10458,8 @@ expand_expr_real_1 (tree exp, rtx target, machine_mode tmode, VOIDmode, modifier == EXPAND_SUM ? EXPAND_NORMAL : modifier, NULL, true); +#if 0 + /* FIXME: Deal with this in another way. */ /* If the field has a mode, we want to access it in the field's mode, not the computed mode. @@ -10470,6 +10472,7 @@ expand_expr_real_1 (tree exp, rtx target, machine_mode tmode, else if (GET_MODE (op0) == VOIDmode) op0 = adjust_address (op0, BLKmode, 0); } +#endif mode2 = CONSTANT_P (op0) ? TYPE_MODE (TREE_TYPE (tem)) : GET_MODE (op0); diff --git a/gcc/ira-costs.c b/gcc/ira-costs.c index 2cd102a0810..8c6c1346f3b 100644 --- a/gcc/ira-costs.c +++ b/gcc/ira-costs.c @@ -2145,7 +2145,13 @@ setup_allocno_class_and_costs (void) if (num < 0) { num = cost_classes_ptr->hard_regno_index[hard_regno]; - ira_assert (num >= 0); + /* If the class cannot hold register of given mode, + we do not care. */ + if (num == -1) + { + reg_costs[j] = 0; + continue; + } } reg_costs[j] = COSTS (costs, i)->cost[num]; } diff --git a/gcc/ira.c b/gcc/ira.c index 08a1cc550b2..f91fcb9e5fc 100644 --- a/gcc/ira.c +++ b/gcc/ira.c @@ -1637,14 +1637,16 @@ ira_init_register_move_cost (machine_mode mode) *p2 != LIM_REG_CLASSES; p2++) if (ira_class_hard_regs_num[*p2] > 0 && (ira_reg_class_max_nregs[*p2][mode] - <= ira_class_hard_regs_num[*p2])) + <= ira_class_hard_regs_num[*p2]) + && contains_reg_of_mode[*p2][mode]) cost = MAX (cost, ira_register_move_cost[mode][cl1][*p2]); for (p1 = ®_class_subclasses[cl1][0]; *p1 != LIM_REG_CLASSES; p1++) if (ira_class_hard_regs_num[*p1] > 0 && (ira_reg_class_max_nregs[*p1][mode] - <= ira_class_hard_regs_num[*p1])) + <= ira_class_hard_regs_num[*p1]) + && contains_reg_of_mode[*p1][mode]) cost = MAX (cost, ira_register_move_cost[mode][*p1][cl2]); ira_assert (cost <= 65535); diff --git a/gcc/lra-constraints.c b/gcc/lra-constraints.c index b1d864fb974..13d2bf2e0a1 100644 --- a/gcc/lra-constraints.c +++ b/gcc/lra-constraints.c @@ -2657,7 +2657,8 @@ process_alt_operands (int only_alternative) constant into memory and it will then win since we don't want to have a different alternative match then. */ - if (! (REG_P (op) && REGNO (op) >= FIRST_PSEUDO_REGISTER)) + if (! (REG_P (op) && REGNO (op) >= FIRST_PSEUDO_REGISTER) + && !CONSTANT_P (op)) { if (lra_dump_file != NULL) fprintf @@ -2754,8 +2755,10 @@ process_alt_operands (int only_alternative) #endif /* Input reloads can be inherited more often than output reloads can be removed, so penalize output - reloads. */ - if (!REG_P (op) || curr_static_id->operand[nop].type != OP_IN) + reloads and also input reloads that are not constants or + registers. */ + if ((!REG_P (op) && !CONSTANT_P (op)) + || curr_static_id->operand[nop].type != OP_IN) { if (lra_dump_file != NULL) fprintf @@ -4170,6 +4173,7 @@ curr_insn_transform (bool check_only_p) && (curr_insn_set == NULL_RTX || !((REG_P (SET_SRC (curr_insn_set)) || MEM_P (SET_SRC (curr_insn_set)) + || GET_CODE (SET_SRC (curr_insn_set)) == CONST_INT || GET_CODE (SET_SRC (curr_insn_set)) == SUBREG) && (REG_P (SET_DEST (curr_insn_set)) || MEM_P (SET_DEST (curr_insn_set)) diff --git a/gcc/lra-int.h b/gcc/lra-int.h index 405071708b1..5a519b02492 100644 --- a/gcc/lra-int.h +++ b/gcc/lra-int.h @@ -189,9 +189,9 @@ struct lra_static_insn_data int commutative; /* Number of operands, duplications, and alternatives of the insn. */ - char n_operands; - char n_dups; - char n_alternatives; + unsigned char n_operands; + unsigned char n_dups; + unsigned char n_alternatives; /* Insns in machine description (or clobbers in asm) may contain explicit hard regs which are not operands. The following list describes such hard registers. */ diff --git a/gcc/print-rtl.c b/gcc/print-rtl.c index dc8d9800de5..7a9acfc9a73 100644 --- a/gcc/print-rtl.c +++ b/gcc/print-rtl.c @@ -343,7 +343,20 @@ rtx_writer::print_rtx_operand_codes_E_and_V (const_rtx in_rtx, int idx) m_sawclose = 1; for (int j = 0; j < XVECLEN (in_rtx, idx); j++) - print_rtx (XVECEXP (in_rtx, idx, j)); + { + int j1; + + print_rtx (XVECEXP (in_rtx, idx, j)); + for (j1 = j + 1; j1 < XVECLEN (in_rtx, idx); j1++) + if (XVECEXP (in_rtx, idx, j) != XVECEXP (in_rtx, idx, j1)) + break; + + if (j1 != j + 1) + { + fprintf (m_outfile, " repeated %ix", j1 - j); + j = j1; + } + } m_indent -= 2; } diff --git a/gcc/recog.h b/gcc/recog.h index 07c60feffc6..469e5ab1109 100644 --- a/gcc/recog.h +++ b/gcc/recog.h @@ -224,26 +224,16 @@ struct recog_data_d /* Gives the operand number that was duplicated in the Nth duplicate-appearance of an operand. */ - char dup_num[MAX_DUP_OPERANDS]; - - /* ??? Note that these are `char' instead of `unsigned char' to (try to) - avoid certain lossage from K&R C, wherein `unsigned char' default - promotes to `unsigned int' instead of `int' as in ISO C. As of 1999, - the most common places to bootstrap from K&R C are SunOS and HPUX, - both of which have signed characters by default. The only other - supported natives that have both K&R C and unsigned characters are - ROMP and Irix 3, and neither have been seen for a while, but do - continue to consider unsignedness when performing arithmetic inside - a comparison. */ + unsigned char dup_num[MAX_DUP_OPERANDS]; /* The number of operands of the insn. */ - char n_operands; + unsigned char n_operands; /* The number of MATCH_DUPs in the insn. */ - char n_dups; + unsigned char n_dups; /* The number of alternatives in the constraints for the insn. */ - char n_alternatives; + unsigned char n_alternatives; /* True if insn is ASM_OPERANDS. */ bool is_asm; @@ -368,10 +358,10 @@ struct insn_data_d const insn_gen_fn genfun; const struct insn_operand_data *const operand; - const char n_generator_args; - const char n_operands; - const char n_dups; - const char n_alternatives; + const unsigned char n_generator_args; + const unsigned char n_operands; + const unsigned char n_dups; + const unsigned char n_alternatives; const char output_format; }; diff --git a/gcc/reload1.c b/gcc/reload1.c index e993749a000..9292a7c1fd5 100644 --- a/gcc/reload1.c +++ b/gcc/reload1.c @@ -3604,7 +3604,7 @@ elimination_costs_in_insn (rtx_insn *insn) rtx old_set = single_set (insn); int i; rtx orig_operand[MAX_RECOG_OPERANDS]; - rtx orig_dup[MAX_RECOG_OPERANDS]; + rtx orig_dup[MAX_DUP_OPERANDS]; struct elim_table *ep; rtx plus_src, plus_cst_src; bool sets_reg_p; diff --git a/gcc/simplify-rtx.c b/gcc/simplify-rtx.c index ef414797af5..06a3f3cd718 100644 --- a/gcc/simplify-rtx.c +++ b/gcc/simplify-rtx.c @@ -5391,6 +5391,65 @@ simplify_cond_clz_ctz (rtx x, rtx_code cmp_code, rtx true_val, rtx false_val) return NULL_RTX; } +/* X is an operand number OP of VEC_MERGE operation with MASK. + Try to simplify using knowledge that values outside of MASK + will not be used. */ + +rtx +simplify_merge_mask (rtx x, rtx mask, int op) +{ + gcc_assert (VECTOR_MODE_P (GET_MODE (x))); + int nunits = GET_MODE_NUNITS (GET_MODE (x)); + if (GET_CODE (x) == VEC_MERGE && rtx_equal_p (XEXP (x, 2), mask)) + { + if (!side_effects_p (XEXP (x, 1 - op))) + return XEXP (x, op); + } + if (side_effects_p (x)) + return NULL_RTX; + if (UNARY_P (x) + && VECTOR_MODE_P (GET_MODE (XEXP (x, 0))) + && GET_MODE_NUNITS (GET_MODE (XEXP (x, 0))) == nunits) + { + rtx top0 = simplify_merge_mask (XEXP (x, 0), mask, op); + if (top0) + return simplify_gen_unary (GET_CODE (x), GET_MODE (x), top0, + GET_MODE (XEXP (x, 0))); + } + if (BINARY_P (x) + && VECTOR_MODE_P (GET_MODE (XEXP (x, 0))) + && GET_MODE_NUNITS (GET_MODE (XEXP (x, 0))) == nunits + && VECTOR_MODE_P (GET_MODE (XEXP (x, 1))) + && GET_MODE_NUNITS (GET_MODE (XEXP (x, 1))) == nunits) + { + rtx top0 = simplify_merge_mask (XEXP (x, 0), mask, op); + rtx top1 = simplify_merge_mask (XEXP (x, 1), mask, op); + if (top0 || top1) + return simplify_gen_binary (GET_CODE (x), GET_MODE (x), + top0 ? top0 : XEXP (x, 0), + top1 ? top1 : XEXP (x, 1)); + } + if (GET_RTX_CLASS (GET_CODE (x)) == RTX_TERNARY + && VECTOR_MODE_P (GET_MODE (XEXP (x, 0))) + && GET_MODE_NUNITS (GET_MODE (XEXP (x, 0))) == nunits + && VECTOR_MODE_P (GET_MODE (XEXP (x, 1))) + && GET_MODE_NUNITS (GET_MODE (XEXP (x, 1))) == nunits + && VECTOR_MODE_P (GET_MODE (XEXP (x, 2))) + && GET_MODE_NUNITS (GET_MODE (XEXP (x, 2))) == nunits) + { + rtx top0 = simplify_merge_mask (XEXP (x, 0), mask, op); + rtx top1 = simplify_merge_mask (XEXP (x, 1), mask, op); + rtx top2 = simplify_merge_mask (XEXP (x, 2), mask, op); + if (top0 || top1) + return simplify_gen_ternary (GET_CODE (x), GET_MODE (x), + GET_MODE (XEXP (x, 0)), + top0 ? top0 : XEXP (x, 0), + top1 ? top1 : XEXP (x, 1), + top2 ? top2 : XEXP (x, 2)); + } + return NULL_RTX; +} + /* Simplify CODE, an operation with result mode MODE and three operands, OP0, OP1, and OP2. OP0_MODE was the mode of OP0 before it became @@ -5672,6 +5731,28 @@ simplify_ternary_operation (enum rtx_code code, machine_mode mode, && !side_effects_p (op2) && !side_effects_p (op1)) return op0; + if (!side_effects_p (op2)) + { + rtx top0 = simplify_merge_mask (op0, op2, 0); + rtx top1 = simplify_merge_mask (op1, op2, 1); + if (top0 || top1) + return simplify_gen_ternary (code, mode, mode, + top0 ? top0 : op0, + top1 ? top1 : op1, op2); + } + + if (GET_CODE (op0) == VEC_MERGE + && rtx_equal_p (op2, XEXP (op0, 2)) + && !side_effects_p (XEXP (op0, 1)) && !side_effects_p (op2)) + return simplify_gen_ternary (code, mode, mode, + XEXP (op0, 0), op1, op2); + + if (GET_CODE (op1) == VEC_MERGE + && rtx_equal_p (op2, XEXP (op1, 2)) + && !side_effects_p (XEXP (op0, 0)) && !side_effects_p (op2)) + return simplify_gen_ternary (code, mode, mode, + XEXP (op0, 1), op1, op2); + break; default: |