diff options
Diffstat (limited to 'gcc/brig/brigfrontend')
31 files changed, 8201 insertions, 0 deletions
diff --git a/gcc/brig/brigfrontend/brig-arg-block-handler.cc b/gcc/brig/brigfrontend/brig-arg-block-handler.cc new file mode 100644 index 00000000000..99945ee63e1 --- /dev/null +++ b/gcc/brig/brigfrontend/brig-arg-block-handler.cc @@ -0,0 +1,66 @@ +/* brig-arg-block-handler.cc -- brig arg block start/end directive handling + Copyright (C) 2016 Free Software Foundation, Inc. + Contributed by Pekka Jaaskelainen <pekka.jaaskelainen@parmance.com> + for General Processor Tech. + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify it under + the terms of the GNU General Public License as published by the Free + Software Foundation; either version 3, or (at your option) any later + version. + + GCC is distributed in the hope that it will be useful, but WITHOUT ANY + WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + for more details. + + You should have received a copy of the GNU General Public License + along with GCC; see the file COPYING3. If not see + <http://www.gnu.org/licenses/>. */ + +#include "brig-code-entry-handler.h" +#include "tree-iterator.h" +#include "system.h" +#include "errors.h" + +#include "tree-pretty-print.h" +#include "print-tree.h" + +size_t +brig_directive_arg_block_handler::operator () (const BrigBase *base) +{ + if (base->kind == BRIG_KIND_DIRECTIVE_ARG_BLOCK_START) + { + /* Initiate a new code block for the call site. */ + tree stmt_list = alloc_stmt_list (); + tree bind_expr + = build3 (BIND_EXPR, void_type_node, NULL, stmt_list, NULL); + tree block = make_node (BLOCK); + BIND_EXPR_BLOCK (bind_expr) = block; + static int block_id = 0; + BLOCK_NUMBER (block) = block_id++; + TREE_USED (block) = 1; + tree m_parentblock = DECL_INITIAL (m_parent.m_cf->m_func_decl); + BLOCK_SUPERCONTEXT (block) = m_parentblock; + + chainon (BLOCK_SUBBLOCKS (m_parentblock), block); + + m_parent.m_cf->m_current_bind_expr = bind_expr; + m_parent.m_cf->m_generating_arg_block = true; + } + else if (base->kind == BRIG_KIND_DIRECTIVE_ARG_BLOCK_END) + { + /* Restore the used bind expression back to the function + scope. */ + tree new_bind_expr = m_parent.m_cf->m_current_bind_expr; + m_parent.m_cf->m_current_bind_expr + = DECL_SAVED_TREE (m_parent.m_cf->m_func_decl); + m_parent.m_cf->append_statement (new_bind_expr); + m_parent.m_cf->m_generating_arg_block = false; + } + else + gcc_unreachable (); + + return base->byteCount; +} diff --git a/gcc/brig/brigfrontend/brig-atomic-inst-handler.cc b/gcc/brig/brigfrontend/brig-atomic-inst-handler.cc new file mode 100644 index 00000000000..87abab89b26 --- /dev/null +++ b/gcc/brig/brigfrontend/brig-atomic-inst-handler.cc @@ -0,0 +1,265 @@ +/* brig-atomic-inst-handler.cc -- brig atomic instruction handling + Copyright (C) 2016 Free Software Foundation, Inc. + + Contributed by Pekka Jaaskelainen <pekka.jaaskelainen@parmance.com> + for General Processor Tech. + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify it under + the terms of the GNU General Public License as published by the Free + Software Foundation; either version 3, or (at your option) any later + version. + + GCC is distributed in the hope that it will be useful, but WITHOUT ANY + WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + for more details. + + You should have received a copy of the GNU General Public License + along with GCC; see the file COPYING3. If not see + <http://www.gnu.org/licenses/>. */ + +#include <sstream> + +#include "brig-code-entry-handler.h" +#include "brig-util.h" +#include "fold-const.h" +#include "diagnostic.h" +#include "tree-pretty-print.h" +#include "print-tree.h" +#include "convert.h" +#include "langhooks.h" +#include "gimple-expr.h" +#include "stringpool.h" +#include "brig-builtins.h" + +brig_atomic_inst_handler::brig_atomic_inst_handler (brig_to_generic &parent) + : brig_code_entry_handler (parent) +{ +} + +size_t +brig_atomic_inst_handler::generate_tree (const BrigInstBase &inst, + BrigAtomicOperation8_t atomic_opcode) +{ + tree_stl_vec operands = build_operands (inst); + const int first_input + = gccbrig_hsa_opcode_op_output_p (inst.opcode, 0) ? 1 : 0; + + tree instr_type = gccbrig_tree_type_for_hsa_type (inst.type); + + /* Utilize the atomic data types (from C++11 support) for implementing + atomic operations. */ + + tree atomic_type = build_qualified_type (instr_type, TYPE_QUAL_ATOMIC); + + gcc_assert (atomic_type != NULL_TREE); + + tree signal_handle = operands[first_input]; + tree atomic_ptype = build_pointer_type (atomic_type); + tree casted_to_ptr = convert_to_pointer (atomic_ptype, signal_handle); + + tree src0 = NULL_TREE; + if (atomic_opcode != BRIG_ATOMIC_LD) + src0 = operands[first_input + 1]; + + tree instr_expr = NULL_TREE; + + tree ptype = build_pointer_type (instr_type); + tree ptr = convert_to_pointer (ptype, operands[first_input]); + + if (atomic_opcode == BRIG_ATOMIC_ST) + { + tree mem_ref = build2 (MEM_REF, atomic_type, casted_to_ptr, + build_int_cst (atomic_ptype, 0)); + instr_expr = build2 (MODIFY_EXPR, atomic_type, mem_ref, src0); + } + else if (atomic_opcode == BRIG_ATOMIC_LD + || (atomic_opcode >= BRIG_ATOMIC_WAIT_EQ + && atomic_opcode <= BRIG_ATOMIC_WAITTIMEOUT_GTE)) + { + tree mem_ref = build2 (MEM_REF, atomic_type, casted_to_ptr, + build_int_cst (atomic_ptype, 0)); + /* signal_wait* instructions can return spuriously before the + condition becomes true. Therefore it's legal to return + right away. TODO: builtin calls which can be + implemented with a power efficient sleep-wait. */ + instr_expr = mem_ref; + } + else if (atomic_opcode == BRIG_ATOMIC_CAS) + { + /* Special case for CAS due to the two args. */ + tree built_in = NULL_TREE; + switch (gccbrig_hsa_type_bit_size (inst.type)) + { + case 32: + built_in + = builtin_decl_explicit (BUILT_IN_SYNC_VAL_COMPARE_AND_SWAP_4); + break; + case 64: + built_in + = builtin_decl_explicit (BUILT_IN_SYNC_VAL_COMPARE_AND_SWAP_8); + break; + default: + gcc_unreachable (); + } + + tree src1 = operands[first_input + 2]; + + tree src0_type + = TREE_VALUE (TREE_CHAIN (TYPE_ARG_TYPES (TREE_TYPE (built_in)))); + + tree src1_type = TREE_VALUE + (TREE_CHAIN (TREE_CHAIN (TYPE_ARG_TYPES (TREE_TYPE (built_in))))); + + instr_expr = call_builtin (built_in, 3, instr_type, ptype, ptr, + src0_type, src0, src1_type, src1); + } + else + { + tree built_in = NULL_TREE; + /* The rest of the builtins have the same number of parameters. + Generate a big if..else that finds the correct builtin + automagically from the def file. */ +#undef DEF_HSAIL_SAT_BUILTIN +#undef DEF_HSAIL_BUILTIN +#undef DEF_HSAIL_ATOMIC_BUILTIN +#undef DEF_HSAIL_INTR_BUILTIN +#undef DEF_HSAIL_CVT_ZEROI_SAT_BUILTIN + +#define DEF_HSAIL_ATOMIC_BUILTIN(ENUM, ATOMIC_OPCODE, HSAIL_TYPE, \ + NAME, TYPE, ATTRS) \ + if (atomic_opcode == ATOMIC_OPCODE && inst.type == HSAIL_TYPE) \ + built_in = builtin_decl_explicit (ENUM); \ + else +#include "brig-builtins.def" + switch (atomic_opcode) + { + case BRIG_ATOMIC_ADD: + switch (gccbrig_hsa_type_bit_size (inst.type)) + { + case 32: + built_in + = builtin_decl_explicit (BUILT_IN_SYNC_FETCH_AND_ADD_4); + break; + case 64: + built_in + = builtin_decl_explicit (BUILT_IN_SYNC_FETCH_AND_ADD_8); + break; + default: + gcc_unreachable (); + } + break; + case BRIG_ATOMIC_SUB: + switch (gccbrig_hsa_type_bit_size (inst.type)) + { + case 32: + built_in + = builtin_decl_explicit (BUILT_IN_SYNC_FETCH_AND_SUB_4); + break; + case 64: + built_in + = builtin_decl_explicit (BUILT_IN_SYNC_FETCH_AND_SUB_8); + break; + default: + gcc_unreachable (); + } + break; + case BRIG_ATOMIC_AND: + switch (gccbrig_hsa_type_bit_size (inst.type)) + { + case 32: + built_in + = builtin_decl_explicit (BUILT_IN_SYNC_FETCH_AND_AND_4); + break; + case 64: + built_in + = builtin_decl_explicit (BUILT_IN_SYNC_FETCH_AND_AND_8); + break; + default: + gcc_unreachable (); + } + break; + case BRIG_ATOMIC_XOR: + switch (gccbrig_hsa_type_bit_size (inst.type)) + { + case 32: + built_in + = builtin_decl_explicit (BUILT_IN_SYNC_FETCH_AND_XOR_4); + break; + case 64: + built_in + = builtin_decl_explicit (BUILT_IN_SYNC_FETCH_AND_XOR_8); + break; + default: + gcc_unreachable (); + } + break; + case BRIG_ATOMIC_OR: + switch (gccbrig_hsa_type_bit_size (inst.type)) + { + case 32: + built_in + = builtin_decl_explicit (BUILT_IN_SYNC_FETCH_AND_OR_4); + break; + case 64: + built_in + = builtin_decl_explicit (BUILT_IN_SYNC_FETCH_AND_OR_8); + break; + default: + gcc_unreachable (); + } + break; + case BRIG_ATOMIC_EXCH: + switch (gccbrig_hsa_type_bit_size (inst.type)) + { + case 32: + built_in + = builtin_decl_explicit (BUILT_IN_SYNC_LOCK_TEST_AND_SET_4); + break; + case 64: + built_in + = builtin_decl_explicit (BUILT_IN_SYNC_LOCK_TEST_AND_SET_8); + break; + default: + gcc_unreachable (); + } + break; + default: + gcc_unreachable (); + }; + + gcc_assert (built_in != NULL_TREE); + tree arg0_type + = TREE_VALUE (TREE_CHAIN (TYPE_ARG_TYPES (TREE_TYPE (built_in)))); + + instr_expr = call_builtin (built_in, 2, instr_type, ptr_type_node, + ptr, arg0_type, src0); + + /* We need a temp variable for the result, because otherwise + the gimplifier drops a necessary (unsigned to signed) cast in + the output assignment and fails a check later. */ + tree tmp_var = create_tmp_var (arg0_type, "builtin_out"); + tree tmp_assign + = build2 (MODIFY_EXPR, TREE_TYPE (tmp_var), tmp_var, instr_expr); + m_parent.m_cf->append_statement (tmp_assign); + instr_expr = tmp_var; + } + + if (first_input > 0) + build_output_assignment (inst, operands[0], instr_expr); + else + m_parent.m_cf->append_statement (instr_expr); + + return inst.base.byteCount; +} + +size_t +brig_atomic_inst_handler::operator () (const BrigBase *base) +{ + const BrigInstAtomic *inst = (const BrigInstAtomic *) base; + BrigAtomicOperation8_t atomic_opcode; + atomic_opcode = inst->atomicOperation; + + return generate_tree (inst->base, atomic_opcode); +} diff --git a/gcc/brig/brigfrontend/brig-basic-inst-handler.cc b/gcc/brig/brigfrontend/brig-basic-inst-handler.cc new file mode 100644 index 00000000000..638f818ef0b --- /dev/null +++ b/gcc/brig/brigfrontend/brig-basic-inst-handler.cc @@ -0,0 +1,865 @@ +/* brig-basic-inst-handler.cc -- brig basic instruction handling + Copyright (C) 2016 Free Software Foundation, Inc. + Contributed by Pekka Jaaskelainen <pekka.jaaskelainen@parmance.com> + for General Processor Tech. + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify it under + the terms of the GNU General Public License as published by the Free + Software Foundation; either version 3, or (at your option) any later + version. + + GCC is distributed in the hope that it will be useful, but WITHOUT ANY + WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + for more details. + + You should have received a copy of the GNU General Public License + along with GCC; see the file COPYING3. If not see + <http://www.gnu.org/licenses/>. */ + +#include <sstream> + +#include "brig-code-entry-handler.h" +#include "brig-util.h" + +#include "errors.h" +#include "gimple-expr.h" +#include "convert.h" +#include "print-tree.h" +#include "tree-pretty-print.h" +#include "langhooks.h" +#include "stor-layout.h" +#include "diagnostic-core.h" +#include "brig-builtins.h" + +brig_basic_inst_handler::brig_basic_inst_handler (brig_to_generic &parent) + : brig_code_entry_handler (parent) +{ +} + +class scalarized_sat_arithmetics : public tree_element_binary_visitor +{ +public: + scalarized_sat_arithmetics (const BrigInstBase &brig_inst) + : m_brig_inst (brig_inst) + { + BrigType16_t element_type = brig_inst.type & BRIG_TYPE_BASE_MASK; + +#undef DEF_HSAIL_SAT_BUILTIN +#undef DEF_HSAIL_BUILTIN +#undef DEF_HSAIL_ATOMIC_BUILTIN +#undef DEF_HSAIL_INTR_BUILTIN +#undef DEF_HSAIL_CVT_ZEROI_SAT_BUILTIN + +#define DEF_HSAIL_SAT_BUILTIN(ENUM, BRIG_OPCODE, HSAIL_TYPE, \ + NAME, TYPE, ATTRS) \ + if (brig_inst.opcode == BRIG_OPCODE && element_type == HSAIL_TYPE) \ + m_builtin = builtin_decl_explicit (ENUM); \ + else +#include "brig-builtins.def" + gcc_unreachable (); + } + + virtual tree + visit_element (brig_code_entry_handler &, tree operand0, tree operand1) + { + /* Implement saturating arithmetics with scalar built-ins for now. + TODO: emit GENERIC nodes for the simplest cases or at least + emit vector built-ins. */ + return call_builtin (m_builtin, 2, TREE_TYPE (operand0), + TREE_TYPE (operand0), operand0, + TREE_TYPE (operand1), operand1); + } + const BrigInstBase &m_brig_inst; + tree m_builtin; +}; + +/* Implements a vector shuffle. ARITH_TYPE is the type of the vector, + OPERANDS[0] is the first vector, OPERAND[1] the second vector and + OPERANDS[2] the shuffle mask in HSAIL format. The output is a VEC_PERM_EXPR + that implements the shuffle as a GENERIC expression. */ + +tree +brig_basic_inst_handler::build_shuffle (tree arith_type, + tree_stl_vec &operands) +{ + tree element_type + = get_unsigned_int_type (TREE_TYPE (TREE_TYPE (operands[0]))); + + /* Offsets to add to the mask values to convert from the + HSAIL mask to VEC_PERM_EXPR masks. VEC_PERM_EXPR mask + assumes an index spanning from 0 to 2 times the vec + width while HSAIL refers separately to two different + input vectors, thus is not a "full shuffle" where all + output elements can originate from any input element. */ + vec<constructor_elt, va_gc> *mask_offset_vals = NULL; + + vec<constructor_elt, va_gc> *input_mask_vals = NULL; + size_t input_mask_element_size + = exact_log2 (TYPE_VECTOR_SUBPARTS (arith_type)); + + /* Unpack the tightly packed mask elements to BIT_FIELD_REFs + from which to construct the mask vector as understood by + VEC_PERM_EXPR. */ + tree mask_operand = add_temp_var ("shuffle_mask", operands[2]); + + tree mask_element_type + = build_nonstandard_integer_type (input_mask_element_size, true); + + for (size_t i = 0; i < TYPE_VECTOR_SUBPARTS (arith_type); ++i) + { + tree mask_element + = build3 (BIT_FIELD_REF, mask_element_type, mask_operand, + build_int_cst (unsigned_char_type_node, + input_mask_element_size), + build_int_cst (unsigned_char_type_node, + i * input_mask_element_size)); + + mask_element = convert (element_type, mask_element); + + tree offset; + if (i < TYPE_VECTOR_SUBPARTS (arith_type) / 2) + offset = build_int_cst (element_type, 0); + else + offset + = build_int_cst (element_type, TYPE_VECTOR_SUBPARTS (arith_type)); + + CONSTRUCTOR_APPEND_ELT (mask_offset_vals, NULL_TREE, offset); + CONSTRUCTOR_APPEND_ELT (input_mask_vals, NULL_TREE, mask_element); + } + tree mask_vec_type + = build_vector_type (element_type, TYPE_VECTOR_SUBPARTS (arith_type)); + + tree mask_vec = build_constructor (mask_vec_type, input_mask_vals); + tree offset_vec = build_constructor (mask_vec_type, mask_offset_vals); + + tree mask = build2 (PLUS_EXPR, mask_vec_type, mask_vec, offset_vec); + + tree perm = build3 (VEC_PERM_EXPR, TREE_TYPE (operands[0]), operands[0], + operands[1], mask); + return perm; +} + +/* Unpacks (extracts) a scalar element with an index in OPERANDS[1] + from the vector expression in OPERANDS[0]. */ + +tree +brig_basic_inst_handler::build_unpack (tree_stl_vec &operands) +{ + /* Implement the unpack with a shuffle that stores the unpacked + element to the lowest bit positions in the dest. After that + a bitwise AND is used to clear the uppermost bits. */ + tree src_element_type = TREE_TYPE (TREE_TYPE (operands[0])); + + /* Perform the operations with a raw (unsigned int type) type. */ + tree element_type = get_unsigned_int_type (src_element_type); + + vec<constructor_elt, va_gc> *input_mask_vals = NULL; + vec<constructor_elt, va_gc> *and_mask_vals = NULL; + + size_t element_count = TYPE_VECTOR_SUBPARTS (TREE_TYPE (operands[0])); + tree vec_type = build_vector_type (element_type, element_count); + + for (size_t i = 0; i < element_count; ++i) + { + tree mask_element; + if (i == 0) + mask_element = convert (element_type, operands[1]); + else + mask_element = build_int_cst (element_type, 0); + + CONSTRUCTOR_APPEND_ELT (input_mask_vals, NULL_TREE, mask_element); + + tree and_mask_element; + if (i == 0) + and_mask_element = build_int_cst (element_type, -1); + else + and_mask_element = build_int_cst (element_type, 0); + CONSTRUCTOR_APPEND_ELT (and_mask_vals, NULL_TREE, and_mask_element); + } + + tree mask_vec = build_constructor (vec_type, input_mask_vals); + + tree and_mask_vec = build_constructor (vec_type, and_mask_vals); + + tree perm = build3 (VEC_PERM_EXPR, vec_type, + build_reinterpret_cast (vec_type, operands[0]), + build_reinterpret_cast (vec_type, operands[0]), mask_vec); + + tree cleared = build2 (BIT_AND_EXPR, vec_type, perm, and_mask_vec); + + size_t s = int_size_in_bytes (TREE_TYPE (cleared)) * BITS_PER_UNIT; + tree raw_type = build_nonstandard_integer_type (s, true); + + tree as_int = build_reinterpret_cast (raw_type, cleared); + + if (int_size_in_bytes (src_element_type) < 4) + { + if (INTEGRAL_TYPE_P (src_element_type)) + return extend_int (as_int, uint32_type_node, src_element_type); + } + return as_int; +} + +/* Packs (inserts) a scalar element in OPERANDS[1] + to the vector in OPERANDS[0] at element position defined by + OPERANDS[2]. */ + +tree +brig_basic_inst_handler::build_pack (tree_stl_vec &operands) +{ + /* Implement using a bit level insertion. + TODO: Reuse this for implementing 'bitinsert' + without a builtin call. */ + + size_t ecount = TYPE_VECTOR_SUBPARTS (TREE_TYPE (operands[0])); + size_t vecsize = int_size_in_bytes (TREE_TYPE (operands[0])) * BITS_PER_UNIT; + tree wide_type = build_nonstandard_integer_type (vecsize, 1); + + tree src_vect = build_reinterpret_cast (wide_type, operands[0]); + src_vect = add_temp_var ("src_vect", src_vect); + + tree scalar = operands[1]; + scalar = add_temp_var ("scalar", convert_to_integer (wide_type, scalar)); + + tree pos = operands[2]; + + /* The upper bits of the position can contain garbage. + Zero them for well-defined semantics. */ + tree t = build2 (BIT_AND_EXPR, TREE_TYPE (pos), operands[2], + build_int_cstu (TREE_TYPE (pos), ecount - 1)); + pos = add_temp_var ("pos", convert (wide_type, t)); + + tree element_type = TREE_TYPE (TREE_TYPE (operands[0])); + size_t element_width = int_size_in_bytes (element_type) * BITS_PER_UNIT; + tree ewidth = build_int_cstu (wide_type, element_width); + + tree bitoffset = build2 (MULT_EXPR, wide_type, ewidth, pos); + bitoffset = add_temp_var ("offset", bitoffset); + + uint64_t mask_int + = element_width == 64 ? (uint64_t) -1 : ((uint64_t) 1 << element_width) - 1; + + tree mask = build_int_cstu (wide_type, mask_int); + + mask = add_temp_var ("mask", convert_to_integer (wide_type, mask)); + + tree clearing_mask + = build1 (BIT_NOT_EXPR, wide_type, + build2 (LSHIFT_EXPR, wide_type, mask, bitoffset)); + + tree zeroed_element + = build2 (BIT_AND_EXPR, wide_type, src_vect, clearing_mask); + + /* TODO: Is the AND necessary: does HSA define what + happens if the upper bits in the inserted element are not + zero? */ + tree element_in_position + = build2 (LSHIFT_EXPR, wide_type, + build2 (BIT_AND_EXPR, wide_type, scalar, mask), bitoffset); + + tree inserted + = build2 (BIT_IOR_EXPR, wide_type, zeroed_element, element_in_position); + return inserted; +} + +/* Implement the unpack{lo,hi}. BRIG_OPCODE should tell which one and + ARITH_TYPE describe the type of the vector arithmetics. + OPERANDS[0] and OPERANDS[1] are the input vectors. */ + +tree +brig_basic_inst_handler::build_unpack_lo_or_hi (BrigOpcode16_t brig_opcode, + tree arith_type, + tree_stl_vec &operands) +{ + tree element_type = get_unsigned_int_type (TREE_TYPE (arith_type)); + tree mask_vec_type + = build_vector_type (element_type, TYPE_VECTOR_SUBPARTS (arith_type)); + + size_t element_count = TYPE_VECTOR_SUBPARTS (arith_type); + vec<constructor_elt, va_gc> *input_mask_vals = NULL; + + size_t offset = (brig_opcode == BRIG_OPCODE_UNPACKLO) ? 0 : element_count / 2; + + for (size_t i = 0; i < element_count / 2; ++i) + { + CONSTRUCTOR_APPEND_ELT (input_mask_vals, NULL_TREE, + build_int_cst (element_type, offset + i)); + CONSTRUCTOR_APPEND_ELT (input_mask_vals, NULL_TREE, + build_int_cst (element_type, + offset + i + element_count)); + } + + tree mask_vec = build_constructor (mask_vec_type, input_mask_vals); + + tree perm = build3 (VEC_PERM_EXPR, TREE_TYPE (operands[0]), operands[0], + operands[1], mask_vec); + return perm; +} + +/* Builds a basic instruction expression from a BRIG instruction. BRIG_OPCODE + is the opcode, BRIG_TYPE the brig type of the instruction, ARITH_TYPE the + desired tree type for the instruction, and OPERANDS the instruction's + input operands already converted to tree nodes. */ + +tree +brig_basic_inst_handler::build_inst_expr (BrigOpcode16_t brig_opcode, + BrigType16_t brig_type, + tree arith_type, + tree_stl_vec &operands) +{ + tree_code opcode = get_tree_code_for_hsa_opcode (brig_opcode, brig_type); + + BrigType16_t inner_type = brig_type & BRIG_TYPE_BASE_MASK; + + tree instr_inner_type + = VECTOR_TYPE_P (arith_type) ? TREE_TYPE (arith_type) : arith_type; + + if (opcode == RSHIFT_EXPR || opcode == LSHIFT_EXPR) + { + /* HSA defines modulo/clipping behavior for shift amounts larger + than the bit width, while tree.def leaves it undefined. + We need to mask the upper bits to ensure the defined behavior. */ + tree scalar_mask + = build_int_cst (instr_inner_type, + gccbrig_hsa_type_bit_size (inner_type) - 1); + + tree mask = VECTOR_TYPE_P (arith_type) + ? build_vector_from_val (arith_type, scalar_mask) + : scalar_mask; + + /* The shift amount is a scalar, broadcast it to produce + a vector shift. */ + if (VECTOR_TYPE_P (arith_type)) + operands[1] = build_vector_from_val (arith_type, operands[1]); + operands[1] = build2 (BIT_AND_EXPR, arith_type, operands[1], mask); + } + + size_t input_count = operands.size (); + size_t output_count = gccbrig_hsa_opcode_op_output_p (brig_opcode, 0) ? + 1 : 0; + + if (opcode == TREE_LIST) + { + /* There was no direct GENERIC opcode for the instruction; + try to emulate it with a chain of GENERIC nodes. */ + if (brig_opcode == BRIG_OPCODE_MAD || brig_opcode == BRIG_OPCODE_MAD24) + { + /* There doesn't seem to be a "standard" MAD built-in in gcc so let's + use a chain of multiply + add for now (double rounding method). + It should be easier for optimizers than a custom built-in call + WIDEN_MULT_EXPR is close, but requires a double size result + type. */ + tree mult_res + = build2 (MULT_EXPR, arith_type, operands[0], operands[1]); + return build2 (PLUS_EXPR, arith_type, mult_res, operands[2]); + } + else if (brig_opcode == BRIG_OPCODE_MAD24HI) + { + tree mult_res + = build2 (MULT_HIGHPART_EXPR, arith_type, operands[0], operands[1]); + return build2 (PLUS_EXPR, arith_type, mult_res, operands[2]); + } + else if (brig_opcode == BRIG_OPCODE_SHUFFLE) + { + return build_shuffle (arith_type, operands); + } + else if (brig_opcode == BRIG_OPCODE_UNPACKLO + || brig_opcode == BRIG_OPCODE_UNPACKHI) + { + return build_unpack_lo_or_hi (brig_opcode, arith_type, operands); + } + else if (brig_opcode == BRIG_OPCODE_UNPACK) + { + return build_unpack (operands); + } + else if (brig_opcode == BRIG_OPCODE_PACK) + { + return build_pack (operands); + } + else if (brig_opcode == BRIG_OPCODE_NRSQRT) + { + /* Implement as 1.0/sqrt (x) and assume gcc instruction selects to + native ISA other than a division, if available. + TODO: this will happen only with unsafe math optimizations + on which cannot be used in general to remain HSAIL compliant. + Perhaps a builtin call would be better option here. */ + return build2 (RDIV_EXPR, arith_type, build_one_cst (arith_type), + expand_or_call_builtin (BRIG_OPCODE_SQRT, brig_type, + arith_type, operands)); + } + else if (brig_opcode == BRIG_OPCODE_NRCP) + { + /* Implement as 1.0/x and assume gcc instruction selects to + native ISA other than a division, if available. */ + return build2 (RDIV_EXPR, arith_type, build_one_cst (arith_type), + operands[0]); + } + else if (brig_opcode == BRIG_OPCODE_LANEID + || brig_opcode == BRIG_OPCODE_MAXWAVEID + || brig_opcode == BRIG_OPCODE_WAVEID) + { + /* Assuming WAVESIZE 1 (for now), therefore LANEID, WAVEID and + MAXWAVEID always return 0. */ + return build_zero_cst (arith_type); + } + else + gcc_unreachable (); + } + else if (opcode == CALL_EXPR) + return expand_or_call_builtin (brig_opcode, brig_type, arith_type, + operands); + else if (output_count == 1) + { + if (input_count == 1) + { + if (opcode == MODIFY_EXPR) + return operands[0]; + else + return build1 (opcode, arith_type, operands[0]); + } + else if (input_count == 2) + return build2 (opcode, arith_type, operands[0], operands[1]); + else if (input_count == 3) + return build3 (opcode, arith_type, operands[0], operands[1], + operands[2]); + else + gcc_unreachable (); + } + else + gcc_unreachable (); + + return NULL_TREE; +} + +/* Handles the basic instructions, including packed instructions. Deals + with the different packing modes by unpacking/packing the wanted + elements. Delegates most of the instruction cases to build_inst_expr(). */ + +size_t +brig_basic_inst_handler::operator () (const BrigBase *base) +{ + const BrigInstBase *brig_inst = (const BrigInstBase *) base; + + tree_stl_vec operands = build_operands (*brig_inst); + + size_t output_count + = gccbrig_hsa_opcode_op_output_p (brig_inst->opcode, 0) ? 1 : 0; + size_t input_count + = operands.size () == 0 ? 0 : (operands.size () - output_count); + + gcc_assert (output_count == 0 || output_count == 1); + + tree_stl_vec::iterator first_input_i = operands.begin (); + if (output_count > 0 && operands.size () > 0) + ++first_input_i; + + tree_stl_vec in_operands; + in_operands.assign (first_input_i, operands.end ()); + + BrigType16_t brig_inst_type = brig_inst->type; + + if (brig_inst->opcode == BRIG_OPCODE_NOP) + return base->byteCount; + else if (brig_inst->opcode == BRIG_OPCODE_FIRSTBIT + || brig_inst->opcode == BRIG_OPCODE_LASTBIT + || brig_inst->opcode == BRIG_OPCODE_SAD) + /* These instructions are reported to be always 32b in HSAIL, but we want + to treat them according to their input argument's type to select the + correct instruction/builtin. */ + brig_inst_type + = gccbrig_tree_type_to_hsa_type (TREE_TYPE (in_operands[0])); + + tree instr_type = gccbrig_tree_type_for_hsa_type (brig_inst_type); + + if (!instr_type) + { + gcc_unreachable (); + return base->byteCount; + } + + bool is_vec_instr = hsa_type_packed_p (brig_inst_type); + + size_t element_size_bits; + size_t element_count; + + if (is_vec_instr) + { + BrigType16_t brig_element_type = brig_inst_type & BRIG_TYPE_BASE_MASK; + element_size_bits = gccbrig_hsa_type_bit_size (brig_element_type); + element_count = gccbrig_hsa_type_bit_size (brig_inst_type) + / gccbrig_hsa_type_bit_size (brig_element_type); + } + else + { + element_size_bits = gccbrig_hsa_type_bit_size (brig_inst_type); + element_count = 1; + } + + /* The actual arithmetics type that should be performed with the + operation. This is not always the same as the original BRIG + opcode's type due to implicit conversions of storage-only f16. */ + tree arith_type = gccbrig_is_bit_operation (brig_inst->opcode) + ? gccbrig_tree_type_for_hsa_type (brig_inst_type) + : get_tree_expr_type_for_hsa_type (brig_inst_type); + + tree instr_expr = NULL_TREE; + + BrigPack8_t p = BRIG_PACK_NONE; + if (brig_inst->base.kind == BRIG_KIND_INST_MOD) + p = ((const BrigInstMod *) brig_inst)->pack; + else if (brig_inst->base.kind == BRIG_KIND_INST_CMP) + p = ((const BrigInstCmp *) brig_inst)->pack; + + if (p == BRIG_PACK_PS || p == BRIG_PACK_PSSAT) + in_operands[1] = build_lower_element_broadcast (in_operands[1]); + else if (p == BRIG_PACK_SP || p == BRIG_PACK_SPSAT) + in_operands[0] = build_lower_element_broadcast (in_operands[0]); + + tree_code opcode + = get_tree_code_for_hsa_opcode (brig_inst->opcode, brig_inst_type); + + if (p >= BRIG_PACK_PPSAT && p <= BRIG_PACK_PSAT) + { + scalarized_sat_arithmetics sat_arith (*brig_inst); + gcc_assert (input_count == 2); + instr_expr = sat_arith (*this, in_operands[0], in_operands[1]); + } + else if (opcode == RETURN_EXPR) + { + if (m_parent.m_cf->m_is_kernel) + { + tree goto_stmt + = build1 (GOTO_EXPR, void_type_node, m_parent.m_cf->m_exit_label); + m_parent.m_cf->append_statement (goto_stmt); + return base->byteCount; + } + else + { + m_parent.m_cf->append_return_stmt (); + return base->byteCount; + } + } + else if (opcode == MULT_HIGHPART_EXPR && + is_vec_instr && element_size_bits < 64) + { + /* MULT_HIGHPART_EXPR works only on target dependent vector sizes and + even the scalars do not seem to work at least for char elements. + + Let's fall back to scalarization and promotion of the vector elements + to larger types with the MULHI computed as a regular MUL. + MULHI for 2x64b seems to work with the Intel CPUs I've tested so + that is passed on for vector processing so there is no need for + 128b scalar arithmetics. + + This is not modular as these type of things do not belong to the + frontend, there should be a legalization phase before the backend + that figures out the best way to compute the MULHI for any + integer vector datatype. + + TODO: promote to larger vector types instead. For example + MULT_HIGHPART_EXPR with s8x8 doesn't work, but s16x8 seems to at least + with my x86-64. + */ + tree_stl_vec operand0_elements; + if (input_count > 0) + unpack (in_operands[0], operand0_elements); + + tree_stl_vec operand1_elements; + if (input_count > 1) + unpack (in_operands[1], operand1_elements); + + tree_stl_vec result_elements; + + tree scalar_type = TREE_TYPE (arith_type); + BrigType16_t element_type = brig_inst_type & BRIG_TYPE_BASE_MASK; + tree promoted_type = short_integer_type_node; + switch (element_type) + { + case BRIG_TYPE_S8: + promoted_type = gccbrig_tree_type_for_hsa_type (BRIG_TYPE_S16); + break; + case BRIG_TYPE_U8: + promoted_type = gccbrig_tree_type_for_hsa_type (BRIG_TYPE_U16); + break; + case BRIG_TYPE_S16: + promoted_type = gccbrig_tree_type_for_hsa_type (BRIG_TYPE_S32); + break; + case BRIG_TYPE_U16: + promoted_type = gccbrig_tree_type_for_hsa_type (BRIG_TYPE_U32); + break; + case BRIG_TYPE_S32: + promoted_type = gccbrig_tree_type_for_hsa_type (BRIG_TYPE_S64); + break; + case BRIG_TYPE_U32: + promoted_type = gccbrig_tree_type_for_hsa_type (BRIG_TYPE_U64); + break; + default: + gcc_unreachable (); + } + + size_t promoted_type_size = int_size_in_bytes (promoted_type) * 8; + + for (size_t i = 0; i < TYPE_VECTOR_SUBPARTS (arith_type); ++i) + { + tree operand0 = convert (promoted_type, operand0_elements.at (i)); + tree operand1 = convert (promoted_type, operand1_elements.at (i)); + + tree scalar_expr + = build2 (MULT_EXPR, promoted_type, operand0, operand1); + + scalar_expr + = build2 (RSHIFT_EXPR, promoted_type, scalar_expr, + build_int_cstu (promoted_type, promoted_type_size / 2)); + + result_elements.push_back (convert (scalar_type, scalar_expr)); + } + instr_expr = pack (result_elements); + } + else + { + /* 'class' is always of b1 type, let's consider it by its + float type when building the instruction to find the + correct builtin. */ + if (brig_inst->opcode == BRIG_OPCODE_CLASS) + brig_inst_type = ((const BrigInstSourceType *) base)->sourceType; + instr_expr = build_inst_expr (brig_inst->opcode, brig_inst_type, + arith_type, in_operands); + } + + if (instr_expr == NULL_TREE) + { + gcc_unreachable (); + return base->byteCount; + } + + if (p == BRIG_PACK_SS || p == BRIG_PACK_S || p == BRIG_PACK_SSSAT + || p == BRIG_PACK_SSAT) + { + /* In case of _s_ or _ss_, select only the lowest element + from the new input to the output. We could extract + the element and use a scalar operation, but try + to keep data in vector registers as much as possible + to avoid copies between scalar and vector datapaths. */ + tree old_value; + tree half_storage_type = gccbrig_tree_type_for_hsa_type (brig_inst_type); + bool is_fp16_operation + = (brig_inst_type & BRIG_TYPE_BASE_MASK) == BRIG_TYPE_F16 + && !gccbrig_is_bit_operation (brig_inst->opcode); + + if (is_fp16_operation) + old_value = build_h2f_conversion + (build_reinterpret_cast (half_storage_type, operands[0])); + else + old_value + = build_reinterpret_cast (TREE_TYPE (instr_expr), operands[0]); + + size_t esize = is_fp16_operation ? 32 : element_size_bits; + + /* Construct a permutation mask where other elements than the lowest one + is picked from the old_value. */ + tree mask_inner_type = build_nonstandard_integer_type (esize, 1); + vec<constructor_elt, va_gc> *constructor_vals = NULL; + for (size_t i = 0; i < element_count; ++i) + { + tree cst; + + if (i == 0) + cst = build_int_cstu (mask_inner_type, element_count); + else + cst = build_int_cstu (mask_inner_type, i); + CONSTRUCTOR_APPEND_ELT (constructor_vals, NULL_TREE, cst); + } + tree mask_vec_type = build_vector_type (mask_inner_type, element_count); + tree mask = build_vector_from_ctor (mask_vec_type, constructor_vals); + + tree new_value = create_tmp_var (TREE_TYPE (instr_expr), "new_output"); + tree assign + = build2 (MODIFY_EXPR, TREE_TYPE (instr_expr), new_value, instr_expr); + m_parent.m_cf->append_statement (assign); + + instr_expr + = build3 (VEC_PERM_EXPR, arith_type, old_value, new_value, mask); + + tree lower_output = create_tmp_var (TREE_TYPE (instr_expr), "s_output"); + tree assign_lower = build2 (MODIFY_EXPR, TREE_TYPE (instr_expr), + lower_output, instr_expr); + m_parent.m_cf->append_statement (assign_lower); + instr_expr = lower_output; + } + + if (output_count == 1) + build_output_assignment (*brig_inst, operands[0], instr_expr); + else + m_parent.m_cf->append_statement (instr_expr); + return base->byteCount; +} + +/* Create an expression that broadcasts the lowest element of the + vector in VEC_OPERAND to all elements of the returned vector. */ + +tree +brig_basic_inst_handler::build_lower_element_broadcast (tree vec_operand) +{ + /* Build the broadcast using shuffle because there's no + direct broadcast in GENERIC and this way there's no need for + a separate extract of the lowest element. */ + tree element_type = TREE_TYPE (TREE_TYPE (vec_operand)); + size_t esize = 8 * int_size_in_bytes (element_type); + + size_t element_count = TYPE_VECTOR_SUBPARTS (TREE_TYPE (vec_operand)); + tree mask_inner_type = build_nonstandard_integer_type (esize, 1); + vec<constructor_elt, va_gc> *constructor_vals = NULL; + + /* Construct the mask. */ + for (size_t i = 0; i < element_count; ++i) + { + tree cst = build_int_cstu (mask_inner_type, element_count); + CONSTRUCTOR_APPEND_ELT (constructor_vals, NULL_TREE, cst); + } + tree mask_vec_type = build_vector_type (mask_inner_type, element_count); + tree mask = build_vector_from_ctor (mask_vec_type, constructor_vals); + + return build3 (VEC_PERM_EXPR, TREE_TYPE (vec_operand), vec_operand, + vec_operand, mask); +} + +/* Returns the tree code that should be used to implement the given + HSA instruction opcode (BRIG_OPCODE) for the given type of instruction + (BRIG_TYPE). In case the opcode cannot be mapped to a TREE node directly, + returns TREE_LIST (if it can be emulated with a simple chain of tree + nodes) or CALL_EXPR if the opcode should be implemented using a builtin + call. */ + +tree_code +brig_basic_inst_handler::get_tree_code_for_hsa_opcode + (BrigOpcode16_t brig_opcode, BrigType16_t brig_type) const +{ + BrigType16_t brig_inner_type = brig_type & BRIG_TYPE_BASE_MASK; + switch (brig_opcode) + { + case BRIG_OPCODE_NOP: + return NOP_EXPR; + case BRIG_OPCODE_ADD: + return PLUS_EXPR; + case BRIG_OPCODE_CMOV: + if (brig_inner_type == brig_type) + return COND_EXPR; + else + return VEC_COND_EXPR; + case BRIG_OPCODE_SUB: + return MINUS_EXPR; + case BRIG_OPCODE_MUL: + case BRIG_OPCODE_MUL24: + return MULT_EXPR; + case BRIG_OPCODE_MULHI: + case BRIG_OPCODE_MUL24HI: + return MULT_HIGHPART_EXPR; + case BRIG_OPCODE_DIV: + if (gccbrig_is_float_type (brig_inner_type)) + return RDIV_EXPR; + else + return TRUNC_DIV_EXPR; + case BRIG_OPCODE_NEG: + return NEGATE_EXPR; + case BRIG_OPCODE_MIN: + if (gccbrig_is_float_type (brig_inner_type)) + return CALL_EXPR; + else + return MIN_EXPR; + case BRIG_OPCODE_MAX: + if (gccbrig_is_float_type (brig_inner_type)) + return CALL_EXPR; + else + return MAX_EXPR; + case BRIG_OPCODE_FMA: + return FMA_EXPR; + case BRIG_OPCODE_ABS: + return ABS_EXPR; + case BRIG_OPCODE_SHL: + return LSHIFT_EXPR; + case BRIG_OPCODE_SHR: + return RSHIFT_EXPR; + case BRIG_OPCODE_OR: + return BIT_IOR_EXPR; + case BRIG_OPCODE_XOR: + return BIT_XOR_EXPR; + case BRIG_OPCODE_AND: + return BIT_AND_EXPR; + case BRIG_OPCODE_NOT: + return BIT_NOT_EXPR; + case BRIG_OPCODE_RET: + return RETURN_EXPR; + case BRIG_OPCODE_MOV: + case BRIG_OPCODE_LDF: + return MODIFY_EXPR; + case BRIG_OPCODE_LD: + case BRIG_OPCODE_ST: + return MEM_REF; + case BRIG_OPCODE_BR: + return GOTO_EXPR; + case BRIG_OPCODE_REM: + if (brig_type == BRIG_TYPE_U64 || brig_type == BRIG_TYPE_U32) + return TRUNC_MOD_EXPR; + else + return CALL_EXPR; + case BRIG_OPCODE_NRCP: + case BRIG_OPCODE_NRSQRT: + /* Implement as 1/f (x). gcc should pattern detect that and + use a native instruction, if available, for it. */ + return TREE_LIST; + case BRIG_OPCODE_FLOOR: + case BRIG_OPCODE_CEIL: + case BRIG_OPCODE_SQRT: + case BRIG_OPCODE_NSQRT: + case BRIG_OPCODE_RINT: + case BRIG_OPCODE_TRUNC: + case BRIG_OPCODE_POPCOUNT: + case BRIG_OPCODE_COPYSIGN: + case BRIG_OPCODE_NCOS: + case BRIG_OPCODE_NSIN: + case BRIG_OPCODE_NLOG2: + case BRIG_OPCODE_NEXP2: + case BRIG_OPCODE_NFMA: + /* Class has type B1 regardless of the float type, thus + the below builtin map search cannot find it. */ + case BRIG_OPCODE_CLASS: + case BRIG_OPCODE_WORKITEMABSID: + return CALL_EXPR; + default: + + /* Some BRIG opcodes can use the same builtins for unsigned and + signed types. Force these cases to unsigned types. + */ + + if (brig_opcode == BRIG_OPCODE_BORROW + || brig_opcode == BRIG_OPCODE_CARRY + || brig_opcode == BRIG_OPCODE_LASTBIT + || brig_opcode == BRIG_OPCODE_BITINSERT) + { + if (brig_type == BRIG_TYPE_S32) + brig_type = BRIG_TYPE_U32; + else if (brig_type == BRIG_TYPE_S64) + brig_type = BRIG_TYPE_U64; + } + + + builtin_map::const_iterator i + = s_custom_builtins.find (std::make_pair (brig_opcode, brig_type)); + if (i != s_custom_builtins.end ()) + return CALL_EXPR; + else if (s_custom_builtins.find + (std::make_pair (brig_opcode, brig_inner_type)) + != s_custom_builtins.end ()) + return CALL_EXPR; + if (brig_inner_type == BRIG_TYPE_F16 + && s_custom_builtins.find + (std::make_pair (brig_opcode, BRIG_TYPE_F32)) + != s_custom_builtins.end ()) + return CALL_EXPR; + break; + } + return TREE_LIST; /* Emulate using a chain of nodes. */ +} diff --git a/gcc/brig/brigfrontend/brig-branch-inst-handler.cc b/gcc/brig/brigfrontend/brig-branch-inst-handler.cc new file mode 100644 index 00000000000..040ae5d3586 --- /dev/null +++ b/gcc/brig/brigfrontend/brig-branch-inst-handler.cc @@ -0,0 +1,221 @@ +/* brig-branch-inst-handler.cc -- brig branch instruction handling + Copyright (C) 2016 Free Software Foundation, Inc. + Contributed by Pekka Jaaskelainen <pekka.jaaskelainen@parmance.com> + for General Processor Tech. + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify it under + the terms of the GNU General Public License as published by the Free + Software Foundation; either version 3, or (at your option) any later + version. + + GCC is distributed in the hope that it will be useful, but WITHOUT ANY + WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + for more details. + + You should have received a copy of the GNU General Public License + along with GCC; see the file COPYING3. If not see + <http://www.gnu.org/licenses/>. */ + +#include "brig-code-entry-handler.h" + +#include "errors.h" +#include "brig-util.h" +#include "tree-pretty-print.h" +#include "print-tree.h" +#include "vec.h" +#include "fold-const.h" + +size_t +brig_branch_inst_handler::operator () (const BrigBase *base) +{ + const BrigInstBase *brig_inst + = (const BrigInstBase *) &((const BrigInstBasic *) base)->base; + + if (brig_inst->opcode == BRIG_OPCODE_CALL) + { + const BrigData *operand_entries + = m_parent.get_brig_data_entry (brig_inst->operands); + tree func_ref = NULL_TREE; + vec<tree, va_gc> *out_args; + vec_alloc (out_args, 1); + vec<tree, va_gc> *in_args; + vec_alloc (in_args, 4); + + size_t operand_count = operand_entries->byteCount / 4; + gcc_assert (operand_count < 4); + + for (size_t i = 0; i < operand_count; ++i) + { + uint32_t operand_offset + = ((const uint32_t *) &operand_entries->bytes)[i]; + const BrigBase *operand_data + = m_parent.get_brig_operand_entry (operand_offset); + if (i == 1) + { + gcc_assert (operand_data->kind == BRIG_KIND_OPERAND_CODE_REF); + func_ref = build_tree_operand (*brig_inst, *operand_data); + continue; + } + gcc_assert (operand_data->kind == BRIG_KIND_OPERAND_CODE_LIST); + const BrigOperandCodeList *codelist + = (const BrigOperandCodeList *) operand_data; + const BrigData *data + = m_parent.get_brig_data_entry (codelist->elements); + + size_t bytes = data->byteCount; + const BrigOperandOffset32_t *operand_ptr + = (const BrigOperandOffset32_t *) data->bytes; + + vec<tree, va_gc> *args = i == 0 ? out_args : in_args; + + while (bytes > 0) + { + BrigOperandOffset32_t offset = *operand_ptr; + const BrigBase *code_element + = m_parent.get_brig_code_entry (offset); + gcc_assert (code_element->kind == BRIG_KIND_DIRECTIVE_VARIABLE); + const BrigDirectiveVariable *brig_var + = (const BrigDirectiveVariable *) code_element; + tree var = m_parent.m_cf->arg_variable (brig_var); + + if (brig_var->type & BRIG_TYPE_ARRAY) + { + /* Array return values are passed as the first argument. */ + args = in_args; + /* Pass pointer to the element zero and use its element zero + as the base address. */ + tree etype = TREE_TYPE (TREE_TYPE (var)); + tree ptype = build_pointer_type (etype); + tree element_zero + = build4 (ARRAY_REF, etype, var, integer_zero_node, + NULL_TREE, NULL_TREE); + var = build1 (ADDR_EXPR, ptype, element_zero); + } + + gcc_assert (var != NULL_TREE); + vec_safe_push (args, var); + ++operand_ptr; + bytes -= 4; + } + } + + gcc_assert (func_ref != NULL_TREE); + gcc_assert (out_args->length () == 0 || out_args->length () == 1); + + tree ret_val_type = void_type_node; + tree ret_val = NULL_TREE; + if (out_args->length () == 1) + { + ret_val = (*out_args)[0]; + ret_val_type = TREE_TYPE (ret_val); + } + + /* Pass the hidden kernel arguments along to the called functions as + they might call builtins that need them or access group/private + memory. */ + + vec_safe_push (in_args, m_parent.m_cf->m_context_arg); + vec_safe_push (in_args, m_parent.m_cf->m_group_base_arg); + vec_safe_push (in_args, m_parent.m_cf->m_private_base_arg); + + tree call = build_call_vec (ret_val_type, build_fold_addr_expr (func_ref), + in_args); + TREE_NOTHROW (func_ref) = 1; + TREE_NOTHROW (call) = 1; + + if (ret_val != NULL_TREE) + { + TREE_ADDRESSABLE (ret_val) = 1; + tree result_assign + = build2 (MODIFY_EXPR, TREE_TYPE (ret_val), ret_val, call); + m_parent.m_cf->append_statement (result_assign); + } + else + { + m_parent.m_cf->append_statement (call); + } + + m_parent.m_cf->m_has_unexpanded_dp_builtins = false; + m_parent.m_cf->m_called_functions.push_back (func_ref); + + return base->byteCount; + } + + tree instr_type = gccbrig_tree_type_for_hsa_type (brig_inst->type); + tree_stl_vec operands = build_operands (*brig_inst); + + if (brig_inst->opcode == BRIG_OPCODE_BR) + { + tree goto_stmt = build1 (GOTO_EXPR, instr_type, operands[0]); + m_parent.m_cf->append_statement (goto_stmt); + } + else if (brig_inst->opcode == BRIG_OPCODE_SBR) + { + tree select = operands[0]; + tree cases = operands[1]; + + tree switch_expr = build3 (SWITCH_EXPR, TREE_TYPE (select), select, + NULL_TREE, NULL_TREE); + + tree default_case + = build_case_label (NULL_TREE, NULL_TREE, + create_artificial_label (UNKNOWN_LOCATION)); + append_to_statement_list (default_case, &SWITCH_BODY (switch_expr)); + + tree default_jump + = build1 (GOTO_EXPR, void_type_node, TREE_VEC_ELT (cases, 0)); + append_to_statement_list (default_jump, &SWITCH_BODY (switch_expr)); + + for (int c = 0; c < TREE_VEC_LENGTH (cases); ++c) + { + tree case_label + = build_case_label (build_int_cst (integer_type_node, c), NULL_TREE, + create_artificial_label (UNKNOWN_LOCATION)); + + append_to_statement_list (case_label, &SWITCH_BODY (switch_expr)); + + tree jump + = build1 (GOTO_EXPR, void_type_node, TREE_VEC_ELT (cases, c)); + append_to_statement_list (jump, &SWITCH_BODY (switch_expr)); + } + m_parent.m_cf->append_statement (switch_expr); + } + else if (brig_inst->opcode == BRIG_OPCODE_CBR) + { + tree condition = operands[0]; + tree target_goto = build1 (GOTO_EXPR, void_type_node, operands[1]); + /* Represents the if..else as (condition)?(goto foo):(goto bar). */ + tree if_stmt + = build3 (COND_EXPR, void_type_node, condition, target_goto, NULL_TREE); + m_parent.m_cf->append_statement (if_stmt); + } + else if (brig_inst->opcode == BRIG_OPCODE_WAVEBARRIER) + { + /* WAVEBARRIER is a NOP when WAVESIZE = 1. */ + } + else if (brig_inst->opcode == BRIG_OPCODE_BARRIER) + { + m_parent.m_cf->m_has_barriers = true; + tree_stl_vec call_operands; + /* FIXME. We should add attributes (are there suitable ones in gcc?) that + ensure the barrier won't be duplicated or moved out of loops etc. + Like the 'noduplicate' of LLVM. Same goes for fbarriers. */ + m_parent.m_cf->append_statement + (expand_or_call_builtin (brig_inst->opcode, BRIG_TYPE_NONE, NULL_TREE, + call_operands)); + } + else if (brig_inst->opcode >= BRIG_OPCODE_ARRIVEFBAR + && brig_inst->opcode <= BRIG_OPCODE_WAITFBAR) + { + m_parent.m_cf->m_has_barriers = true; + m_parent.m_cf->append_statement + (expand_or_call_builtin (brig_inst->opcode, BRIG_TYPE_NONE, + uint32_type_node, operands)); + } + else + gcc_unreachable (); + return base->byteCount; +} diff --git a/gcc/brig/brigfrontend/brig-cmp-inst-handler.cc b/gcc/brig/brigfrontend/brig-cmp-inst-handler.cc new file mode 100644 index 00000000000..f455d1ec44e --- /dev/null +++ b/gcc/brig/brigfrontend/brig-cmp-inst-handler.cc @@ -0,0 +1,198 @@ +/* brig-cmp-inst-handler.cc -- brig cmp instruction handling + Copyright (C) 2016 Free Software Foundation, Inc. + Contributed by Pekka Jaaskelainen <pekka.jaaskelainen@parmance.com> + for General Processor Tech. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 3, or (at your option) any later +version. + +GCC is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +<http://www.gnu.org/licenses/>. */ + +#include "brig-code-entry-handler.h" +#include "diagnostic.h" +#include "tree-pretty-print.h" +#include "print-tree.h" +#include "brig-util.h" +#include "convert.h" + +size_t +brig_cmp_inst_handler::operator () (const BrigBase *base) +{ + const BrigInstBase *inst_base = (const BrigInstBase *) base; + const BrigInstCmp *inst = (const BrigInstCmp *) base; + + tree cmp_type = get_tree_expr_type_for_hsa_type (inst->sourceType); + + /* The destination type to convert the comparison result to. */ + tree dest_type = gccbrig_tree_type_for_hsa_type (inst_base->type); + + const bool is_fp16_dest + = (inst_base->type & BRIG_TYPE_BASE_MASK) == BRIG_TYPE_F16; + const bool is_boolean_dest + = (inst_base->type & BRIG_TYPE_BASE_MASK) == BRIG_TYPE_B1; + + bool is_int_cmp = VECTOR_TYPE_P (cmp_type) + ? INTEGRAL_TYPE_P (TREE_TYPE (cmp_type)) + : INTEGRAL_TYPE_P (cmp_type); + + /* The type for the GENERIC comparison. It should match the + input operand width for vector comparisons, a boolean + otherwise. */ + tree result_type = get_comparison_result_type (cmp_type); + + /* Save the result as a boolean and extend/convert it to the + wanted destination type. */ + tree expr = NULL_TREE; + + std::vector<tree> operands = build_operands (*inst_base); + + switch (inst->compare) + { + case BRIG_COMPARE_SEQ: + case BRIG_COMPARE_EQ: + expr = build2 (EQ_EXPR, result_type, operands[1], operands[2]); + break; + case BRIG_COMPARE_SNE: + case BRIG_COMPARE_NE: + expr = build2 (NE_EXPR, result_type, operands[1], operands[2]); + + if (!is_int_cmp) + expr = build2 (BIT_AND_EXPR, TREE_TYPE (expr), + expr, + build2 (ORDERED_EXPR, result_type, operands[1], + operands[2])); + break; + case BRIG_COMPARE_SLT: + case BRIG_COMPARE_LT: + expr = build2 (LT_EXPR, result_type, operands[1], operands[2]); + break; + case BRIG_COMPARE_SLE: + case BRIG_COMPARE_LE: + expr = build2 (LE_EXPR, result_type, operands[1], operands[2]); + break; + case BRIG_COMPARE_SGT: + case BRIG_COMPARE_GT: + expr = build2 (GT_EXPR, result_type, operands[1], operands[2]); + break; + case BRIG_COMPARE_SGE: + case BRIG_COMPARE_GE: + expr = build2 (GE_EXPR, result_type, operands[1], operands[2]); + break; + case BRIG_COMPARE_SEQU: + case BRIG_COMPARE_EQU: + expr = build2 (UNEQ_EXPR, result_type, operands[1], operands[2]); + break; + case BRIG_COMPARE_SNEU: + case BRIG_COMPARE_NEU: + expr = build2 (NE_EXPR, result_type, operands[1], operands[2]); + break; + case BRIG_COMPARE_SLTU: + case BRIG_COMPARE_LTU: + expr = build2 (UNLT_EXPR, result_type, operands[1], operands[2]); + break; + case BRIG_COMPARE_SLEU: + case BRIG_COMPARE_LEU: + expr = build2 (UNLE_EXPR, result_type, operands[1], operands[2]); + break; + case BRIG_COMPARE_SGTU: + case BRIG_COMPARE_GTU: + expr = build2 (UNGT_EXPR, result_type, operands[1], operands[2]); + break; + case BRIG_COMPARE_SGEU: + case BRIG_COMPARE_GEU: + expr = build2 (UNGE_EXPR, result_type, operands[1], operands[2]); + break; + case BRIG_COMPARE_SNUM: + case BRIG_COMPARE_NUM: + expr = build2 (ORDERED_EXPR, result_type, operands[1], operands[2]); + break; + case BRIG_COMPARE_SNAN: + case BRIG_COMPARE_NAN: + expr = build2 (UNORDERED_EXPR, result_type, operands[1], operands[2]); + break; + default: + break; + } + + if (expr == NULL_TREE) + gcc_unreachable (); + + if (is_fp16_dest) + { + expr = convert_to_real (brig_to_generic::s_fp32_type, expr); + } + else if (VECTOR_TYPE_P (dest_type) && ANY_INTEGRAL_TYPE_P (dest_type) + && !is_boolean_dest + && (inst->sourceType & BRIG_TYPE_BASE_MASK) != BRIG_TYPE_F16) + { + /* In later gcc versions, the output of comparison is not + all ones for vectors like still in 4.9.1. We need to use + an additional VEC_COND_EXPR to produce the all ones 'true' value + required by HSA. + VEC_COND_EXPR <a == b, { -1, -1, -1, -1 }, { 0, 0, 0, 0 }>; */ + + tree all_ones + = build_vector_from_val (dest_type, + build_minus_one_cst (TREE_TYPE (dest_type))); + tree all_zeroes + = build_vector_from_val (dest_type, + build_zero_cst (TREE_TYPE (dest_type))); + expr = build3 (VEC_COND_EXPR, dest_type, expr, all_ones, all_zeroes); + } + else if (INTEGRAL_TYPE_P (dest_type) && !is_boolean_dest) + { + /* We need to produce the all-ones pattern for the width of the whole + resulting integer type. Use back and forth shifts for propagating + the lower 1. */ + tree signed_type = signed_type_for (dest_type); + tree signed_result = convert_to_integer (signed_type, expr); + + size_t result_width = int_size_in_bytes (dest_type) * BITS_PER_UNIT; + + tree shift_amount_cst + = build_int_cstu (signed_type, result_width - 1); + + tree shift_left_result + = build2 (LSHIFT_EXPR, signed_type, signed_result, shift_amount_cst); + + expr = build2 (RSHIFT_EXPR, signed_type, shift_left_result, + shift_amount_cst); + } + else if (SCALAR_FLOAT_TYPE_P (dest_type)) + { + expr = convert_to_real (dest_type, expr); + } + else if (VECTOR_TYPE_P (dest_type) + && (inst->sourceType & BRIG_TYPE_BASE_MASK) == BRIG_TYPE_F16) + { + /* Because F16 comparison is emulated as an F32 comparison with S32 + results, we must now truncate the result vector to S16s so it + fits to the destination register. We can build the target vector + type from the f16 storage type (unsigned ints). */ + expr = add_temp_var ("wide_cmp_result", expr); + tree_stl_vec wide_elements; + tree_stl_vec shrunk_elements; + unpack (expr, wide_elements); + for (size_t i = 0; i < wide_elements.size (); ++i) + { + tree wide = wide_elements.at (i); + shrunk_elements.push_back + (convert_to_integer (short_integer_type_node, wide)); + } + expr = pack (shrunk_elements); + } + build_output_assignment (*inst_base, operands[0], expr); + + return base->byteCount; +} diff --git a/gcc/brig/brigfrontend/brig-code-entry-handler.cc b/gcc/brig/brigfrontend/brig-code-entry-handler.cc new file mode 100644 index 00000000000..5fe20124152 --- /dev/null +++ b/gcc/brig/brigfrontend/brig-code-entry-handler.cc @@ -0,0 +1,1716 @@ +/* brig-code-entry-handler.cc -- a gccbrig base class + Copyright (C) 2016 Free Software Foundation, Inc. + Contributed by Pekka Jaaskelainen <pekka.jaaskelainen@parmance.com> + for General Processor Tech. + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify it under + the terms of the GNU General Public License as published by the Free + Software Foundation; either version 3, or (at your option) any later + version. + + GCC is distributed in the hope that it will be useful, but WITHOUT ANY + WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + for more details. + + You should have received a copy of the GNU General Public License + along with GCC; see the file COPYING3. If not see + <http://www.gnu.org/licenses/>. */ + +#include "brig-code-entry-handler.h" + +#include "stringpool.h" +#include "tree-iterator.h" +#include "toplev.h" +#include "diagnostic.h" +#include "brig-machine.h" +#include "brig-util.h" +#include "errors.h" +#include "real.h" +#include "print-tree.h" +#include "tree-pretty-print.h" +#include "target.h" +#include "langhooks.h" +#include "gimple-expr.h" +#include "convert.h" +#include "brig-util.h" +#include "builtins.h" +#include "phsa.h" +#include "brig-builtins.h" +#include "fold-const.h" + +brig_code_entry_handler::builtin_map brig_code_entry_handler::s_custom_builtins; + +brig_code_entry_handler::brig_code_entry_handler (brig_to_generic &parent) + : brig_entry_handler (parent) +{ + if (s_custom_builtins.size () > 0) return; + + /* Populate the builtin index. */ +#undef DEF_HSAIL_ATOMIC_BUILTIN +#undef DEF_HSAIL_CVT_ZEROI_SAT_BUILTIN +#undef DEF_HSAIL_INTR_BUILTIN +#undef DEF_HSAIL_SAT_BUILTIN +#undef DEF_HSAIL_BUILTIN +#define DEF_HSAIL_BUILTIN(ENUM, HSAIL_OPCODE, HSAIL_TYPE, NAME, TYPE, ATTRS) \ + s_custom_builtins[std::make_pair (HSAIL_OPCODE, HSAIL_TYPE)] \ + = builtin_decl_explicit (ENUM); + +#include "brig-builtins.def" +} + +/* Build a tree operand which is a reference to a piece of code. REF is the + original reference as a BRIG object. */ + +tree +brig_code_entry_handler::build_code_ref (const BrigBase &ref) +{ + if (ref.kind == BRIG_KIND_DIRECTIVE_LABEL) + { + const BrigDirectiveLabel *brig_label = (const BrigDirectiveLabel *) &ref; + + const BrigData *label_name + = m_parent.get_brig_data_entry (brig_label->name); + + std::string label_str ((const char *) (label_name->bytes), + label_name->byteCount); + return m_parent.m_cf->label (label_str); + } + else if (ref.kind == BRIG_KIND_DIRECTIVE_FUNCTION) + { + const BrigDirectiveExecutable *func + = (const BrigDirectiveExecutable *) &ref; + return m_parent.function_decl (m_parent.get_mangled_name (func)); + } + else if (ref.kind == BRIG_KIND_DIRECTIVE_FBARRIER) + { + const BrigDirectiveFbarrier* fbar = (const BrigDirectiveFbarrier*)&ref; + + uint64_t offset = m_parent.group_variable_segment_offset + (m_parent.get_mangled_name (fbar)); + + return build_int_cst (uint32_type_node, offset); + } + else + gcc_unreachable (); +} + +/* Produce a tree operand for the given BRIG_INST and its OPERAND. + OPERAND_TYPE should be the operand type in case it should not + be dictated by the BrigBase. IS_INPUT indicates if the operand + is an input operand or a result. */ + +tree +brig_code_entry_handler::build_tree_operand (const BrigInstBase &brig_inst, + const BrigBase &operand, + tree operand_type, bool is_input) +{ + switch (operand.kind) + { + case BRIG_KIND_OPERAND_OPERAND_LIST: + { + vec<constructor_elt, va_gc> *constructor_vals = NULL; + const BrigOperandOperandList &oplist + = (const BrigOperandOperandList &) operand; + const BrigData *data = m_parent.get_brig_data_entry (oplist.elements); + size_t bytes = data->byteCount; + const BrigOperandOffset32_t *operand_ptr + = (const BrigOperandOffset32_t *) data->bytes; + while (bytes > 0) + { + BrigOperandOffset32_t offset = *operand_ptr; + const BrigBase *operand_element + = m_parent.get_brig_operand_entry (offset); + tree element + = build_tree_operand (brig_inst, *operand_element, operand_type); + + /* In case a vector is used an input, cast the elements to + correct size here so we don't need a separate unpack/pack for it. + fp16-fp32 conversion is done in build_operands (). */ + if (is_input && TREE_TYPE (element) != operand_type) + { + if (int_size_in_bytes (TREE_TYPE (element)) + == int_size_in_bytes (operand_type) + && !INTEGRAL_TYPE_P (operand_type)) + element = build1 (VIEW_CONVERT_EXPR, operand_type, element); + else + element = convert (operand_type, element); + } + + CONSTRUCTOR_APPEND_ELT (constructor_vals, NULL_TREE, element); + ++operand_ptr; + bytes -= 4; + } + size_t element_count = data->byteCount / 4; + tree vec_type = build_vector_type (operand_type, element_count); + + return build_constructor (vec_type, constructor_vals); + } + case BRIG_KIND_OPERAND_CODE_LIST: + { + /* Build a TREE_VEC of code expressions. */ + + const BrigOperandCodeList &oplist + = (const BrigOperandCodeList &) operand; + const BrigData *data = m_parent.get_brig_data_entry (oplist.elements); + size_t bytes = data->byteCount; + const BrigOperandOffset32_t *operand_ptr + = (const BrigOperandOffset32_t *) data->bytes; + + size_t case_index = 0; + size_t element_count = data->byteCount / 4; + + /* Create a TREE_VEC out of the labels in the list. */ + tree vec = make_tree_vec (element_count); + + while (bytes > 0) + { + BrigOperandOffset32_t offset = *operand_ptr; + const BrigBase *ref = m_parent.get_brig_code_entry (offset); + tree element = build_code_ref (*ref); + + gcc_assert (case_index < element_count); + TREE_VEC_ELT (vec, case_index) = element; + case_index++; + + ++operand_ptr; + bytes -= 4; + } + return vec; + } + case BRIG_KIND_OPERAND_REGISTER: + { + const BrigOperandRegister *brig_reg + = (const BrigOperandRegister *) &operand; + return m_parent.m_cf->get_m_var_declfor_reg (brig_reg); + } + case BRIG_KIND_OPERAND_CONSTANT_BYTES: + { + const BrigOperandConstantBytes *brigConst + = (const BrigOperandConstantBytes *) &operand; + /* The constants can be of different type than the instruction + and are implicitly casted to the input operand. */ + return get_tree_cst_for_hsa_operand (brigConst, NULL_TREE); + } + case BRIG_KIND_OPERAND_WAVESIZE: + { + if (!INTEGRAL_TYPE_P (operand_type)) + { + gcc_unreachable (); + return NULL_TREE; + } + return build_int_cstu (operand_type, gccbrig_get_target_wavesize ()); + } + case BRIG_KIND_OPERAND_CODE_REF: + { + const BrigOperandCodeRef *brig_code_ref + = (const BrigOperandCodeRef *) &operand; + + const BrigBase *ref = m_parent.get_brig_code_entry (brig_code_ref->ref); + + return build_code_ref (*ref); + } + case BRIG_KIND_OPERAND_ADDRESS: + { + return build_address_operand (brig_inst, + (const BrigOperandAddress &) operand); + } + default: + gcc_unreachable (); + } +} + +/* Build a tree node representing an address reference from a BRIG_INST and its + ADDR_OPERAND. */ + +tree +brig_code_entry_handler::build_address_operand + (const BrigInstBase &brig_inst, const BrigOperandAddress &addr_operand) +{ + tree instr_type = gccbrig_tree_type_for_hsa_type (brig_inst.type); + + BrigSegment8_t segment = BRIG_SEGMENT_GLOBAL; + if (brig_inst.opcode == BRIG_OPCODE_LDA) + segment = ((const BrigInstAddr &) brig_inst).segment; + else if (brig_inst.base.kind == BRIG_KIND_INST_MEM) + segment = ((const BrigInstMem &) brig_inst).segment; + else if (brig_inst.base.kind == BRIG_KIND_INST_ATOMIC) + segment = ((const BrigInstAtomic &) brig_inst).segment; + + tree var_offset = NULL_TREE; + tree const_offset = NULL_TREE; + tree symbol_base = NULL_TREE; + + if (addr_operand.symbol != 0) + { + const BrigDirectiveVariable *arg_symbol + = (const BrigDirectiveVariable *) m_parent.get_brig_code_entry + (addr_operand.symbol); + + std::string var_name = m_parent.get_mangled_name (arg_symbol); + + if (segment == BRIG_SEGMENT_KERNARG) + { + /* Find the offset to the kernarg buffer for the given + kernel argument variable. */ + tree func = m_parent.m_cf->m_func_decl; + /* __args is the first parameter in kernel functions. */ + symbol_base = DECL_ARGUMENTS (func); + uint64_t offset = m_parent.m_cf->kernel_arg_offset (arg_symbol); + if (offset > 0) + const_offset = build_int_cst (size_type_node, offset); + } + else if (segment == BRIG_SEGMENT_GROUP) + { + + uint64_t offset = m_parent.group_variable_segment_offset (var_name); + const_offset = build_int_cst (size_type_node, offset); + } + else if (segment == BRIG_SEGMENT_PRIVATE || segment == BRIG_SEGMENT_SPILL) + { + uint32_t offset = m_parent.private_variable_segment_offset (var_name); + + /* Compute the offset to the work item's copy: + + single-wi-offset * local_size + wiflatid * varsize + + This way the work items have the same variable in + successive elements to each other in the segment, + helping to achieve autovectorization of loads/stores + with stride 1. */ + + tree_stl_vec uint32_0 + = tree_stl_vec (1, build_int_cst (uint32_type_node, 0)); + + tree_stl_vec uint32_1 + = tree_stl_vec (1, build_int_cst (uint32_type_node, 1)); + + tree_stl_vec uint32_2 + = tree_stl_vec (1, build_int_cst (uint32_type_node, 2)); + + tree local_size + = build2 (MULT_EXPR, uint32_type_node, + expand_or_call_builtin (BRIG_OPCODE_WORKGROUPSIZE, + BRIG_TYPE_U32, + uint32_type_node, uint32_0), + expand_or_call_builtin (BRIG_OPCODE_WORKGROUPSIZE, + BRIG_TYPE_U32, + uint32_type_node, uint32_1)); + + local_size + = build2 (MULT_EXPR, uint32_type_node, + expand_or_call_builtin (BRIG_OPCODE_WORKGROUPSIZE, + BRIG_TYPE_U32, + uint32_type_node, uint32_2), + local_size); + + tree var_region + = build2 (MULT_EXPR, uint32_type_node, + build_int_cst (uint32_type_node, offset), local_size); + + tree_stl_vec operands; + tree pos + = build2 (MULT_EXPR, uint32_type_node, + build_int_cst (uint32_type_node, + m_parent.private_variable_size (var_name)), + expand_or_call_builtin (BRIG_OPCODE_WORKITEMFLATID, + BRIG_TYPE_U32, + uint32_type_node, operands)); + + tree var_offset + = build2 (PLUS_EXPR, uint32_type_node, var_region, pos); + + /* In case of LDA this is returned directly as an integer value. + For other mem-related instructions, we will convert this segment + offset to a flat address by adding it as an offset to a (private + or group) base pointer later on. Same applies to group_var_offset. */ + symbol_base + = add_temp_var ("priv_var_offset", + convert (size_type_node, var_offset)); + } + else if (segment == BRIG_SEGMENT_ARG) + { + tree arg_var_decl; + if (m_parent.m_cf->m_ret_value_brig_var == arg_symbol) + arg_var_decl = m_parent.m_cf->m_ret_temp; + else + arg_var_decl = m_parent.m_cf->arg_variable (arg_symbol); + + gcc_assert (arg_var_decl != NULL_TREE); + + tree ptype = build_pointer_type (instr_type); + + if (arg_symbol->type & BRIG_TYPE_ARRAY) + { + + /* Two different type of array references in case of arguments + depending where they are referred at. In the caller (argument + segment), the reference is to an array object and + in the callee, the array object has been passed as a pointer + to the array object. */ + + if (POINTER_TYPE_P (TREE_TYPE (arg_var_decl))) + symbol_base = build_reinterpret_cast (ptype, arg_var_decl); + else + { + /* In case we are referring to an array (the argument in + call site), use its element zero as the base address. */ + tree element_zero + = build4 (ARRAY_REF, TREE_TYPE (TREE_TYPE (arg_var_decl)), + arg_var_decl, integer_zero_node, NULL_TREE, + NULL_TREE); + symbol_base = build1 (ADDR_EXPR, ptype, element_zero); + } + } + else + symbol_base = build1 (ADDR_EXPR, ptype, arg_var_decl); + } + else + { + tree global_var_decl = m_parent.global_variable (var_name); + + /* In case the global variable hasn't been defined (yet), + use the host def indirection ptr variable. */ + if (global_var_decl == NULL_TREE) + { + std::string host_ptr_name + = std::string (PHSA_HOST_DEF_PTR_PREFIX) + var_name; + tree host_defined_ptr = m_parent.global_variable (host_ptr_name); + gcc_assert (host_defined_ptr != NULL_TREE); + symbol_base = host_defined_ptr; + } + else + { + gcc_assert (global_var_decl != NULL_TREE); + + tree ptype = build_pointer_type (instr_type); + symbol_base = build1 (ADDR_EXPR, ptype, global_var_decl); + } + } + } + + if (brig_inst.opcode != BRIG_OPCODE_LDA) + { + /* In case of lda_* we want to return the segment address because it's + used as a value, perhaps in address computation and later converted + explicitly to a flat address. + + In case of other instructions with memory operands we produce the flat + address directly here (assuming the target does not have a separate + address space for group/private segments for now). */ + if (segment == BRIG_SEGMENT_GROUP) + symbol_base = m_parent.m_cf->m_group_base_arg; + else if (segment == BRIG_SEGMENT_PRIVATE + || segment == BRIG_SEGMENT_SPILL) + { + if (symbol_base != NULL_TREE) + symbol_base = build2 (POINTER_PLUS_EXPR, ptr_type_node, + m_parent.m_cf->m_private_base_arg, + symbol_base); + else + symbol_base = m_parent.m_cf->m_private_base_arg; + } + } + + if (addr_operand.reg != 0) + { + const BrigOperandRegister *mem_base_reg + = (const BrigOperandRegister *) m_parent.get_brig_operand_entry + (addr_operand.reg); + tree base_reg_var = m_parent.m_cf->get_m_var_declfor_reg (mem_base_reg); + var_offset = convert_to_pointer (ptr_type_node, base_reg_var); + + gcc_assert (var_offset != NULL_TREE); + } + /* The pointer type we use to access the memory. Should be of the + width of the load/store instruction, not the target/data + register. */ + tree ptype = build_pointer_type (instr_type); + + gcc_assert (ptype != NULL_TREE); + + tree addr = NULL_TREE; + if (symbol_base != NULL_TREE && var_offset != NULL_TREE) + /* The most complex addressing mode: symbol + reg [+ const offset]. */ + addr = build2 (POINTER_PLUS_EXPR, ptr_type_node, + convert (ptr_type_node, symbol_base), + convert (size_type_node, var_offset)); + else if (var_offset != NULL) + addr = var_offset; + else if (symbol_base != NULL) + addr = symbol_base; + + if (const_offset != NULL_TREE) + { + if (addr == NULL_TREE) + /* At least direct module-scope global group symbol access with LDA + has only the const_offset. Group base ptr is not added as LDA should + return the segment address, not the flattened one. */ + addr = const_offset; + else + addr = build2 (POINTER_PLUS_EXPR, ptr_type_node, + addr, convert (size_type_node, const_offset)); + } + + /* We might have two const offsets in case of group or private arrays + which have the first offset to the incoming group/private pointer + arg, and the second one an offset to it. */ + uint64_t offs = gccbrig_to_uint64_t (addr_operand.offset); + if (offs > 0) + { + tree const_offset_2 = build_int_cst (size_type_node, offs); + if (addr == NULL_TREE) + addr = const_offset_2; + else + addr = build2 (POINTER_PLUS_EXPR, ptr_type_node, + addr, convert (size_type_node, const_offset_2)); + + } + + gcc_assert (addr != NULL_TREE); + return convert_to_pointer (ptype, addr); +} + +/* Builds a tree operand with the given OPERAND_INDEX for the given + BRIG_INST with the desired tree OPERAND_TYPE. OPERAND_TYPE can + be NULL in case the type is forced by the BRIG_INST type. */ + +tree +brig_code_entry_handler::build_tree_operand_from_brig + (const BrigInstBase *brig_inst, tree operand_type, size_t operand_index) +{ + const BrigData *operand_entries + = m_parent.get_brig_data_entry (brig_inst->operands); + + uint32_t operand_offset + = ((const uint32_t *) &operand_entries->bytes)[operand_index]; + const BrigBase *operand_data + = m_parent.get_brig_operand_entry (operand_offset); + return build_tree_operand (*brig_inst, *operand_data, operand_type); +} + +/* Builds a single (scalar) constant initialized element of type + ELEMENT_TYPE from the buffer pointed to by NEXT_DATA. */ + +tree +brig_code_entry_handler::build_tree_cst_element + (BrigType16_t element_type, const unsigned char *next_data) const +{ + + tree tree_element_type = gccbrig_tree_type_for_hsa_type (element_type); + + tree cst; + switch (element_type) + { + case BRIG_TYPE_F16: + { + HOST_WIDE_INT low = *(const uint16_t *) next_data; + cst = build_int_cst (uint16_type_node, low); + break; + } + case BRIG_TYPE_F32: + { + REAL_VALUE_TYPE val; + ieee_single_format.decode (&ieee_single_format, &val, + (const long *) next_data); + cst = build_real (tree_element_type, val); + break; + } + case BRIG_TYPE_F64: + { + long data[2]; + data[0] = *(const uint32_t *) next_data; + data[1] = *(const uint32_t *) (next_data + 4); + REAL_VALUE_TYPE val; + ieee_double_format.decode (&ieee_double_format, &val, data); + cst = build_real (tree_element_type, val); + break; + } + case BRIG_TYPE_S8: + case BRIG_TYPE_S16: + case BRIG_TYPE_S32: + case BRIG_TYPE_S64: + { + HOST_WIDE_INT low = *(const int64_t *) next_data; + cst = build_int_cst (tree_element_type, low); + break; + } + case BRIG_TYPE_U8: + case BRIG_TYPE_U16: + case BRIG_TYPE_U32: + case BRIG_TYPE_U64: + { + unsigned HOST_WIDE_INT low = *(const uint64_t *) next_data; + cst = build_int_cstu (tree_element_type, low); + break; + } + case BRIG_TYPE_SIG64: + { + unsigned HOST_WIDE_INT low = *(const uint64_t *) next_data; + cst = build_int_cstu (uint64_type_node, low); + break; + } + case BRIG_TYPE_SIG32: + { + unsigned HOST_WIDE_INT low = *(const uint64_t *) next_data; + cst = build_int_cstu (uint32_type_node, low); + break; + } + default: + gcc_unreachable (); + return NULL_TREE; + } + return cst; +} + +/* Produce a tree constant type for the given BRIG constant (BRIG_CONST). + TYPE should be the forced instruction type, otherwise the type is + dictated by the BRIG_CONST. */ + +tree +brig_code_entry_handler::get_tree_cst_for_hsa_operand + (const BrigOperandConstantBytes *brig_const, tree type) const +{ + const BrigData *data = m_parent.get_brig_data_entry (brig_const->bytes); + + tree cst = NULL_TREE; + + if (type == NULL_TREE) + type = gccbrig_tree_type_for_hsa_type (brig_const->type); + + /* The type of a single (scalar) element inside an array, + vector or an array of vectors. */ + BrigType16_t scalar_element_type + = brig_const->type & BRIG_TYPE_BASE_MASK; + tree tree_element_type = type; + + vec<constructor_elt, va_gc> *constructor_vals = NULL; + + if (TREE_CODE (type) == ARRAY_TYPE) + tree_element_type = TREE_TYPE (type); + + size_t bytes_left = data->byteCount; + const unsigned char *next_data = data->bytes; + size_t scalar_element_size + = gccbrig_hsa_type_bit_size (scalar_element_type) / BITS_PER_UNIT; + + while (bytes_left > 0) + { + if (VECTOR_TYPE_P (tree_element_type)) + { + /* In case of vector type elements (or sole vectors), + create a vector ctor. */ + size_t element_count = TYPE_VECTOR_SUBPARTS (tree_element_type); + if (bytes_left < scalar_element_size * element_count) + fatal_error (UNKNOWN_LOCATION, + "Not enough bytes left for the initializer " + "(%lu need %lu).", + bytes_left, scalar_element_size * element_count); + + vec<constructor_elt, va_gc> *vec_els = NULL; + for (size_t i = 0; i < element_count; ++i) + { + tree element + = build_tree_cst_element (scalar_element_type, next_data); + CONSTRUCTOR_APPEND_ELT (vec_els, NULL_TREE, element); + bytes_left -= scalar_element_size; + next_data += scalar_element_size; + } + cst = build_vector_from_ctor (tree_element_type, vec_els); + } + else + { + if (bytes_left < scalar_element_size) + fatal_error (UNKNOWN_LOCATION, + "Not enough bytes left for the initializer " + "(%lu need %lu).", + bytes_left, scalar_element_size); + cst = build_tree_cst_element (scalar_element_type, next_data); + bytes_left -= scalar_element_size; + next_data += scalar_element_size; + } + CONSTRUCTOR_APPEND_ELT (constructor_vals, NULL_TREE, cst); + } + + if (TREE_CODE (type) == ARRAY_TYPE) + return build_constructor (type, constructor_vals); + else + return cst; +} + +/* Return the matching tree instruction arithmetics type for the + given BRIG_TYPE. The aritmethics type is the one with which + computation is done (in contrast to the storage type). F16 + arithmetics type is emulated using F32 for now. */ + +tree +brig_code_entry_handler::get_tree_expr_type_for_hsa_type + (BrigType16_t brig_type) const +{ + BrigType16_t brig_inner_type = brig_type & BRIG_TYPE_BASE_MASK; + if (brig_inner_type == BRIG_TYPE_F16) + { + if (brig_inner_type == brig_type) + return m_parent.s_fp32_type; + size_t element_count = gccbrig_hsa_type_bit_size (brig_type) / 16; + return build_vector_type (m_parent.s_fp32_type, element_count); + } + else + return gccbrig_tree_type_for_hsa_type (brig_type); +} + +/* In case the HSA instruction must be implemented using a builtin, + this function is called to get the correct builtin function. + TYPE is the instruction tree type, BRIG_OPCODE the opcode of the + brig instruction and BRIG_TYPE the brig instruction's type. */ + +tree +brig_code_entry_handler::get_builtin_for_hsa_opcode + (tree type, BrigOpcode16_t brig_opcode, BrigType16_t brig_type) const +{ + tree builtin = NULL_TREE; + tree builtin_type = type; + + /* For vector types, first find the scalar version of the builtin. */ + if (type != NULL_TREE && VECTOR_TYPE_P (type)) + builtin_type = TREE_TYPE (type); + BrigType16_t brig_inner_type = brig_type & BRIG_TYPE_BASE_MASK; + + /* Some BRIG opcodes can use the same builtins for unsigned and + signed types. Force these cases to unsigned types. */ + + if (brig_opcode == BRIG_OPCODE_BORROW + || brig_opcode == BRIG_OPCODE_CARRY + || brig_opcode == BRIG_OPCODE_LASTBIT + || brig_opcode == BRIG_OPCODE_BITINSERT) + { + if (brig_type == BRIG_TYPE_S32) + brig_type = BRIG_TYPE_U32; + else if (brig_type == BRIG_TYPE_S64) + brig_type = BRIG_TYPE_U64; + } + + switch (brig_opcode) + { + case BRIG_OPCODE_FLOOR: + builtin = mathfn_built_in (builtin_type, BUILT_IN_FLOOR); + break; + case BRIG_OPCODE_CEIL: + builtin = mathfn_built_in (builtin_type, BUILT_IN_CEIL); + break; + case BRIG_OPCODE_SQRT: + case BRIG_OPCODE_NSQRT: + builtin = mathfn_built_in (builtin_type, BUILT_IN_SQRT); + break; + case BRIG_OPCODE_RINT: + builtin = mathfn_built_in (builtin_type, BUILT_IN_RINT); + break; + case BRIG_OPCODE_TRUNC: + builtin = mathfn_built_in (builtin_type, BUILT_IN_TRUNC); + break; + case BRIG_OPCODE_COPYSIGN: + builtin = mathfn_built_in (builtin_type, BUILT_IN_COPYSIGN); + break; + case BRIG_OPCODE_NSIN: + builtin = mathfn_built_in (builtin_type, BUILT_IN_SIN); + break; + case BRIG_OPCODE_NLOG2: + builtin = mathfn_built_in (builtin_type, BUILT_IN_LOG2); + break; + case BRIG_OPCODE_NEXP2: + builtin = mathfn_built_in (builtin_type, BUILT_IN_EXP2); + break; + case BRIG_OPCODE_NFMA: + builtin = mathfn_built_in (builtin_type, BUILT_IN_FMA); + break; + case BRIG_OPCODE_NCOS: + builtin = mathfn_built_in (builtin_type, BUILT_IN_COS); + break; + case BRIG_OPCODE_POPCOUNT: + /* Popcount should be typed by its argument type (the return value + is always u32). Let's use a b64 version for also for b32 for now. */ + return builtin_decl_explicit (BUILT_IN_POPCOUNTL); + case BRIG_OPCODE_BORROW: + /* Borrow uses the same builtin for unsigned and signed types. */ + if (brig_type == BRIG_TYPE_S32 || brig_type == BRIG_TYPE_U32) + return builtin_decl_explicit (BUILT_IN_HSAIL_BORROW_U32); + else + return builtin_decl_explicit (BUILT_IN_HSAIL_BORROW_U64); + case BRIG_OPCODE_CARRY: + /* Carry also uses the same builtin for unsigned and signed types. */ + if (brig_type == BRIG_TYPE_S32 || brig_type == BRIG_TYPE_U32) + return builtin_decl_explicit (BUILT_IN_HSAIL_CARRY_U32); + else + return builtin_decl_explicit (BUILT_IN_HSAIL_CARRY_U64); + default: + + /* Use our builtin index for finding a proper builtin for the BRIG + opcode and BRIG type. This takes care most of the builtin cases, + the special cases are handled in the separate 'case' statements + above. */ + builtin_map::const_iterator i + = s_custom_builtins.find (std::make_pair (brig_opcode, brig_type)); + if (i != s_custom_builtins.end ()) + return (*i).second; + + if (brig_inner_type != brig_type) + { + /* Try to find a scalar built-in we could use. */ + i = s_custom_builtins.find + (std::make_pair (brig_opcode, brig_inner_type)); + if (i != s_custom_builtins.end ()) + return (*i).second; + } + + /* In case this is an fp16 operation that is promoted to fp32, + try to find a fp32 scalar built-in. */ + if (brig_inner_type == BRIG_TYPE_F16) + { + i = s_custom_builtins.find + (std::make_pair (brig_opcode, BRIG_TYPE_F32)); + if (i != s_custom_builtins.end ()) + return (*i).second; + } + gcc_unreachable (); + } + + if (VECTOR_TYPE_P (type) && builtin != NULL_TREE) + { + /* Try to find a vectorized version of the built-in. + TODO: properly assert that builtin is a mathfn builtin? */ + tree vec_builtin + = targetm.vectorize.builtin_vectorized_function + (builtin_mathfn_code (builtin), type, type); + if (vec_builtin != NULL_TREE) + return vec_builtin; + else + return builtin; + } + if (builtin == NULL_TREE) + gcc_unreachable (); + return builtin; +} + +/* Return the correct GENERIC type for storing comparison results + of operand with the type given in SOURCE_TYPE. */ + +tree +brig_code_entry_handler::get_comparison_result_type (tree source_type) +{ + if (VECTOR_TYPE_P (source_type)) + { + size_t element_size = int_size_in_bytes (TREE_TYPE (source_type)); + return build_vector_type + (build_nonstandard_boolean_type (element_size * BITS_PER_UNIT), + TYPE_VECTOR_SUBPARTS (source_type)); + } + else + return gccbrig_tree_type_for_hsa_type (BRIG_TYPE_B1); +} + +/* Returns true in case the given opcode needs to know about work-item context + data. In such case the context data is passed as a pointer to a work-item + context object, as the last argument in the builtin call. */ + +bool +brig_code_entry_handler::needs_workitem_context_data + (BrigOpcode16_t brig_opcode) const +{ + switch (brig_opcode) + { + case BRIG_OPCODE_WORKITEMABSID: + case BRIG_OPCODE_WORKITEMFLATABSID: + case BRIG_OPCODE_WORKITEMFLATID: + case BRIG_OPCODE_CURRENTWORKITEMFLATID: + case BRIG_OPCODE_WORKITEMID: + case BRIG_OPCODE_WORKGROUPID: + case BRIG_OPCODE_WORKGROUPSIZE: + case BRIG_OPCODE_CURRENTWORKGROUPSIZE: + case BRIG_OPCODE_GRIDGROUPS: + case BRIG_OPCODE_GRIDSIZE: + case BRIG_OPCODE_DIM: + case BRIG_OPCODE_PACKETID: + case BRIG_OPCODE_PACKETCOMPLETIONSIG: + case BRIG_OPCODE_BARRIER: + case BRIG_OPCODE_WAVEBARRIER: + case BRIG_OPCODE_ARRIVEFBAR: + case BRIG_OPCODE_INITFBAR: + case BRIG_OPCODE_JOINFBAR: + case BRIG_OPCODE_LEAVEFBAR: + case BRIG_OPCODE_RELEASEFBAR: + case BRIG_OPCODE_WAITFBAR: + case BRIG_OPCODE_CUID: + case BRIG_OPCODE_MAXCUID: + case BRIG_OPCODE_DEBUGTRAP: + case BRIG_OPCODE_GROUPBASEPTR: + case BRIG_OPCODE_KERNARGBASEPTR: + case BRIG_OPCODE_ALLOCA: + return true; + default: + return false; + }; +} + +/* Returns true in case the given opcode that would normally be generated + as a builtin call can be expanded to tree nodes. */ + +bool +brig_code_entry_handler::can_expand_builtin (BrigOpcode16_t brig_opcode) const +{ + switch (brig_opcode) + { + case BRIG_OPCODE_WORKITEMFLATABSID: + case BRIG_OPCODE_WORKITEMFLATID: + case BRIG_OPCODE_WORKITEMABSID: + case BRIG_OPCODE_WORKGROUPSIZE: + case BRIG_OPCODE_CURRENTWORKGROUPSIZE: + /* TODO: expand more builtins. */ + return true; + default: + return false; + }; +} + +/* Try to expand the given builtin call to reuse a previously generated + variable, if possible. If not, just call the given builtin. + BRIG_OPCODE and BRIG_TYPE identify the builtin's BRIG opcode/type, + ARITH_TYPE its GENERIC type, and OPERANDS contains the builtin's + input operands. */ + +tree +brig_code_entry_handler::expand_or_call_builtin (BrigOpcode16_t brig_opcode, + BrigType16_t brig_type, + tree arith_type, + tree_stl_vec &operands) +{ + if (m_parent.m_cf->m_is_kernel && can_expand_builtin (brig_opcode)) + return expand_builtin (brig_opcode, operands); + + tree built_in + = get_builtin_for_hsa_opcode (arith_type, brig_opcode, brig_type); + + if (!VECTOR_TYPE_P (TREE_TYPE (TREE_TYPE (built_in))) + && arith_type != NULL_TREE && VECTOR_TYPE_P (arith_type) + && brig_opcode != BRIG_OPCODE_LERP + && brig_opcode != BRIG_OPCODE_PACKCVT + && brig_opcode != BRIG_OPCODE_SAD + && brig_opcode != BRIG_OPCODE_SADHI) + { + /* Call the scalar built-in for all elements in the vector. */ + tree_stl_vec operand0_elements; + if (operands.size () > 0) + unpack (operands[0], operand0_elements); + + tree_stl_vec operand1_elements; + if (operands.size () > 1) + unpack (operands[1], operand1_elements); + + tree_stl_vec result_elements; + + for (size_t i = 0; i < TYPE_VECTOR_SUBPARTS (arith_type); ++i) + { + tree_stl_vec call_operands; + if (operand0_elements.size () > 0) + call_operands.push_back (operand0_elements.at (i)); + + if (operand1_elements.size () > 0) + call_operands.push_back (operand1_elements.at (i)); + + result_elements.push_back + (expand_or_call_builtin (brig_opcode, brig_type, + TREE_TYPE (arith_type), + call_operands)); + } + return pack (result_elements); + } + + tree_stl_vec call_operands; + tree_stl_vec operand_types; + + tree arg_type_chain = TYPE_ARG_TYPES (TREE_TYPE (built_in)); + + for (size_t i = 0; i < operands.size (); ++i) + { + tree operand_type = TREE_VALUE (arg_type_chain); + call_operands.push_back (convert (operand_type, operands[i])); + operand_types.push_back (operand_type); + arg_type_chain = TREE_CHAIN (arg_type_chain); + } + + if (needs_workitem_context_data (brig_opcode)) + { + call_operands.push_back (m_parent.m_cf->m_context_arg); + operand_types.push_back (ptr_type_node); + m_parent.m_cf->m_has_unexpanded_dp_builtins = true; + } + + size_t operand_count = call_operands.size (); + + call_operands.resize (4, NULL_TREE); + operand_types.resize (4, NULL_TREE); + for (size_t i = 0; i < operand_count; ++i) + call_operands.at (i) = build_reinterpret_cast (operand_types.at (i), + call_operands.at (i)); + + tree fnptr = build_fold_addr_expr (built_in); + return build_call_array (TREE_TYPE (TREE_TYPE (built_in)), fnptr, + operand_count, &call_operands[0]); +} + +/* Instead of calling a built-in, reuse a previously returned value known to + be still valid. This is beneficial especially for the work-item + identification related builtins as not having them as calls can lead to + more easily vectorizable parallel loops for multi work-item work-groups. + BRIG_OPCODE identifies the builtin and OPERANDS store the operands. */ + +tree +brig_code_entry_handler::expand_builtin (BrigOpcode16_t brig_opcode, + tree_stl_vec &operands) +{ + tree_stl_vec uint32_0 = tree_stl_vec (1, build_int_cst (uint32_type_node, 0)); + + tree_stl_vec uint32_1 = tree_stl_vec (1, build_int_cst (uint32_type_node, 1)); + + tree_stl_vec uint32_2 = tree_stl_vec (1, build_int_cst (uint32_type_node, 2)); + + if (brig_opcode == BRIG_OPCODE_WORKITEMFLATABSID) + { + tree id0 = expand_builtin (BRIG_OPCODE_WORKITEMABSID, uint32_0); + id0 = convert (uint64_type_node, id0); + + tree id1 = expand_builtin (BRIG_OPCODE_WORKITEMABSID, uint32_1); + id1 = convert (uint64_type_node, id1); + + tree id2 = expand_builtin (BRIG_OPCODE_WORKITEMABSID, uint32_2); + id2 = convert (uint64_type_node, id2); + + tree max0 = convert (uint64_type_node, + m_parent.m_cf->m_grid_size_vars[0]); + tree max1 = convert (uint64_type_node, + m_parent.m_cf->m_grid_size_vars[1]); + + tree id2_x_max0_x_max1 = build2 (MULT_EXPR, uint64_type_node, id2, max0); + id2_x_max0_x_max1 + = build2 (MULT_EXPR, uint64_type_node, id2_x_max0_x_max1, max1); + + tree id1_x_max0 = build2 (MULT_EXPR, uint64_type_node, id1, max0); + + tree sum = build2 (PLUS_EXPR, uint64_type_node, id0, id1_x_max0); + sum = build2 (PLUS_EXPR, uint64_type_node, sum, id2_x_max0_x_max1); + + return add_temp_var ("workitemflatabsid", sum); + } + else if (brig_opcode == BRIG_OPCODE_WORKITEMABSID) + { + HOST_WIDE_INT dim = int_constant_value (operands[0]); + + tree local_id_var = m_parent.m_cf->m_local_id_vars[dim]; + tree wg_id_var = m_parent.m_cf->m_wg_id_vars[dim]; + tree wg_size_var = m_parent.m_cf->m_wg_size_vars[dim]; + tree grid_size_var = m_parent.m_cf->m_grid_size_vars[dim]; + + tree wg_id_x_wg_size = build2 (MULT_EXPR, uint32_type_node, + convert (uint32_type_node, wg_id_var), + convert (uint32_type_node, wg_size_var)); + tree sum + = build2 (PLUS_EXPR, uint32_type_node, wg_id_x_wg_size, local_id_var); + + /* We need a modulo here because of work-groups which have dimensions + larger than the grid size :( TO CHECK: is this really allowed in the + specs? */ + tree modulo + = build2 (TRUNC_MOD_EXPR, uint32_type_node, sum, grid_size_var); + + return add_temp_var (std::string ("workitemabsid_") + + (char) ((int) 'x' + dim), + modulo); + } + else if (brig_opcode == BRIG_OPCODE_WORKITEMFLATID) + { + tree z_x_wgsx_wgsy + = build2 (MULT_EXPR, uint32_type_node, + m_parent.m_cf->m_local_id_vars[2], + m_parent.m_cf->m_wg_size_vars[0]); + z_x_wgsx_wgsy = build2 (MULT_EXPR, uint32_type_node, z_x_wgsx_wgsy, + m_parent.m_cf->m_wg_size_vars[1]); + + tree y_x_wgsx + = build2 (MULT_EXPR, uint32_type_node, + m_parent.m_cf->m_local_id_vars[1], + m_parent.m_cf->m_wg_size_vars[0]); + + tree sum = build2 (PLUS_EXPR, uint32_type_node, y_x_wgsx, z_x_wgsx_wgsy); + sum = build2 (PLUS_EXPR, uint32_type_node, + m_parent.m_cf->m_local_id_vars[0], + sum); + return add_temp_var ("workitemflatid", sum); + } + else if (brig_opcode == BRIG_OPCODE_WORKGROUPSIZE) + { + HOST_WIDE_INT dim = int_constant_value (operands[0]); + return m_parent.m_cf->m_wg_size_vars[dim]; + } + else if (brig_opcode == BRIG_OPCODE_CURRENTWORKGROUPSIZE) + { + HOST_WIDE_INT dim = int_constant_value (operands[0]); + return m_parent.m_cf->m_cur_wg_size_vars[dim]; + } + else + gcc_unreachable (); + + return NULL_TREE; +} + +/* Appends and returns a new temp variable and an accompanying assignment + statement that stores the value of the given EXPR and has the given NAME. */ + +tree +brig_code_entry_handler::add_temp_var (std::string name, tree expr) +{ + tree temp_var = create_tmp_var (TREE_TYPE (expr), name.c_str ()); + tree assign = build2 (MODIFY_EXPR, TREE_TYPE (temp_var), temp_var, expr); + m_parent.m_cf->append_statement (assign); + return temp_var; +} + +/* Creates a FP32 to FP16 conversion call, assuming the source and destination + are FP32 type variables. */ + +tree +brig_code_entry_handler::build_f2h_conversion (tree source) +{ + return float_to_half () (*this, source); +} + +/* Creates a FP16 to FP32 conversion call, assuming the source and destination + are FP32 type variables. */ + +tree +brig_code_entry_handler::build_h2f_conversion (tree source) +{ + return half_to_float () (*this, source); +} + +/* Builds and "normalizes" the dest and source operands for the instruction + execution; converts the input operands to the expected instruction type, + performs half to float conversions, constant to correct type variable, + and flush to zero (if applicable). */ + +tree_stl_vec +brig_code_entry_handler::build_operands (const BrigInstBase &brig_inst) +{ + /* Flush to zero. */ + bool ftz = false; + const BrigBase *base = &brig_inst.base; + + if (base->kind == BRIG_KIND_INST_MOD) + { + const BrigInstMod *mod = (const BrigInstMod *) base; + ftz = mod->modifier & BRIG_ALU_FTZ; + } + else if (base->kind == BRIG_KIND_INST_CMP) + { + const BrigInstCmp *cmp = (const BrigInstCmp *) base; + ftz = cmp->modifier & BRIG_ALU_FTZ; + } + + bool is_vec_instr = hsa_type_packed_p (brig_inst.type); + + size_t element_count; + if (is_vec_instr) + { + BrigType16_t brig_element_type = brig_inst.type & BRIG_TYPE_BASE_MASK; + element_count = gccbrig_hsa_type_bit_size (brig_inst.type) + / gccbrig_hsa_type_bit_size (brig_element_type); + } + else + element_count = 1; + + bool is_fp16_arith = false; + + tree src_type; + tree dest_type; + if (base->kind == BRIG_KIND_INST_CMP) + { + const BrigInstCmp *cmp_inst = (const BrigInstCmp *) base; + src_type = gccbrig_tree_type_for_hsa_type (cmp_inst->sourceType); + dest_type = gccbrig_tree_type_for_hsa_type (brig_inst.type); + is_fp16_arith + = (cmp_inst->sourceType & BRIG_TYPE_BASE_MASK) == BRIG_TYPE_F16; + } + else if (base->kind == BRIG_KIND_INST_SOURCE_TYPE) + { + const BrigInstSourceType *src_type_inst + = (const BrigInstSourceType *) base; + src_type = gccbrig_tree_type_for_hsa_type (src_type_inst->sourceType); + dest_type = gccbrig_tree_type_for_hsa_type (brig_inst.type); + is_fp16_arith + = (src_type_inst->sourceType & BRIG_TYPE_BASE_MASK) == BRIG_TYPE_F16 + && !gccbrig_is_bit_operation (brig_inst.opcode); + } + else if (base->kind == BRIG_KIND_INST_SEG_CVT) + { + const BrigInstSegCvt *seg_cvt_inst = (const BrigInstSegCvt *) base; + src_type = gccbrig_tree_type_for_hsa_type (seg_cvt_inst->sourceType); + dest_type = gccbrig_tree_type_for_hsa_type (brig_inst.type); + } + else if (base->kind == BRIG_KIND_INST_MEM) + { + src_type = gccbrig_tree_type_for_hsa_type (brig_inst.type); + dest_type = src_type; + /* With mem instructions we don't want to cast the fp16 + back and forth between fp32, because the load/stores + are not specific to the data type. */ + is_fp16_arith = false; + } + else if (base->kind == BRIG_KIND_INST_CVT) + { + const BrigInstCvt *cvt_inst = (const BrigInstCvt *) base; + + src_type = gccbrig_tree_type_for_hsa_type (cvt_inst->sourceType); + dest_type = gccbrig_tree_type_for_hsa_type (brig_inst.type); + } + else + { + switch (brig_inst.opcode) + { + case BRIG_OPCODE_INITFBAR: + case BRIG_OPCODE_JOINFBAR: + case BRIG_OPCODE_WAITFBAR: + case BRIG_OPCODE_ARRIVEFBAR: + case BRIG_OPCODE_LEAVEFBAR: + case BRIG_OPCODE_RELEASEFBAR: + src_type = uint32_type_node; + break; + default: + src_type = gccbrig_tree_type_for_hsa_type (brig_inst.type); + break; + } + dest_type = src_type; + is_fp16_arith + = !gccbrig_is_bit_operation (brig_inst.opcode) + && (brig_inst.type & BRIG_TYPE_BASE_MASK) == BRIG_TYPE_F16; + } + + /* Halfs are a tricky special case: their "storage format" is u16, but + scalars are stored in 32b regs while packed f16 are... well packed. */ + tree half_storage_type = element_count > 1 + ? gccbrig_tree_type_for_hsa_type (brig_inst.type) + : uint32_type_node; + + const BrigData *operand_entries + = m_parent.get_brig_data_entry (brig_inst.operands); + std::vector<tree> operands; + for (size_t i = 0; i < operand_entries->byteCount / 4; ++i) + { + uint32_t operand_offset = ((const uint32_t *) &operand_entries->bytes)[i]; + const BrigBase *operand_data + = m_parent.get_brig_operand_entry (operand_offset); + + const bool is_output + = gccbrig_hsa_opcode_op_output_p (brig_inst.opcode, i); + + tree operand_type = is_output ? dest_type : src_type; + + bool half_to_float = is_fp16_arith; + + /* Special cases for operand types. */ + if ((brig_inst.opcode == BRIG_OPCODE_SHL + || brig_inst.opcode == BRIG_OPCODE_SHR) + && i == 2) + /* The shift amount is always a scalar. */ + operand_type + = VECTOR_TYPE_P (src_type) ? TREE_TYPE (src_type) : src_type; + else if (brig_inst.opcode == BRIG_OPCODE_SHUFFLE) + { + if (i == 3) + /* HSAIL shuffle inputs the MASK vector as tightly packed bits + while GENERIC VEC_PERM_EXPR expects the mask elements to be + of the same size as the elements in the input vectors. Let's + cast to a scalar type here and convert to the VEC_PERM_EXPR + format in instruction handling. There are no arbitrary bit + width int types in GENERIC so we cannot use the original + vector type. */ + operand_type = uint32_type_node; + else + /* Always treat the element as unsigned ints to avoid + sign extensions/negative offsets with masks, which + are expected to be of the same element type as the + data in VEC_PERM_EXPR. With shuffles the data type + should not matter as it's a "raw operation". */ + operand_type = get_unsigned_int_type (operand_type); + } + else if (brig_inst.opcode == BRIG_OPCODE_PACK) + { + if (i == 1) + operand_type = get_unsigned_int_type (dest_type); + else if (i == 2) + operand_type = get_unsigned_int_type (TREE_TYPE (dest_type)); + else if (i == 3) + operand_type = uint32_type_node; + } + else if (brig_inst.opcode == BRIG_OPCODE_UNPACK && i == 2) + operand_type = uint32_type_node; + else if (brig_inst.opcode == BRIG_OPCODE_SAD && i == 3) + operand_type = uint32_type_node; + else if (brig_inst.opcode == BRIG_OPCODE_CLASS && i == 2) + { + operand_type = uint32_type_node; + half_to_float = false; + } + else if (half_to_float) + /* Treat the operands as the storage type at this point. */ + operand_type = half_storage_type; + + tree operand = build_tree_operand (brig_inst, *operand_data, operand_type, + !is_output); + + gcc_assert (operand); + + /* Cast/convert the inputs to correct types as expected by the GENERIC + opcode instruction. */ + if (!is_output) + { + if (half_to_float) + operand = build_h2f_conversion + (build_reinterpret_cast (half_storage_type, operand)); + else if (TREE_CODE (operand) != LABEL_DECL + && TREE_CODE (operand) != TREE_VEC + && operand_data->kind != BRIG_KIND_OPERAND_ADDRESS + && !VECTOR_TYPE_P (TREE_TYPE (operand))) + { + size_t reg_width = int_size_in_bytes (TREE_TYPE (operand)); + size_t instr_width = int_size_in_bytes (operand_type); + if (reg_width == instr_width) + operand = build_reinterpret_cast (operand_type, operand); + else if (reg_width > instr_width) + { + /* Clip the operand because the instruction's bitwidth + is smaller than the HSAIL reg width. */ + if (INTEGRAL_TYPE_P (operand_type)) + operand + = convert_to_integer (signed_or_unsigned_type_for + (TYPE_UNSIGNED (operand_type), + operand_type), operand); + else + operand = build_reinterpret_cast (operand_type, operand); + } + else if (reg_width < instr_width) + /* At least shift amount operands can be read from smaller + registers than the data operands. */ + operand = convert (operand_type, operand); + } + else if (brig_inst.opcode == BRIG_OPCODE_SHUFFLE) + /* Force the operand type to be treated as the raw type. */ + operand = build_reinterpret_cast (operand_type, operand); + + if (brig_inst.opcode == BRIG_OPCODE_CMOV && i == 1) + { + /* gcc expects the lower bit to be 1 (or all ones in case of + vectors) while CMOV assumes false iff 0. Convert the input + here to what gcc likes by generating + 'operand = operand != 0'. */ + tree cmp_res_type = get_comparison_result_type (operand_type); + operand = build2 (NE_EXPR, cmp_res_type, operand, + build_zero_cst (TREE_TYPE (operand))); + } + + if (ftz) + operand = flush_to_zero (is_fp16_arith) (*this, operand); + } + operands.push_back (operand); + } + return operands; +} + +/* Build the GENERIC for assigning the result of an instruction to the result + "register" (variable). BRIG_INST is the original brig instruction, + OUTPUT the result variable/register, INST_EXPR the one producing the + result. Required bitcasts and fp32 to fp16 conversions are added as + well. */ + +tree +brig_code_entry_handler::build_output_assignment (const BrigInstBase &brig_inst, + tree output, tree inst_expr) +{ + /* The destination type might be different from the output register + variable type (which is always an unsigned integer type). */ + tree output_type = TREE_TYPE (output); + tree input_type = TREE_TYPE (inst_expr); + bool is_fp16 = (brig_inst.type & BRIG_TYPE_BASE_MASK) == BRIG_TYPE_F16 + && brig_inst.base.kind != BRIG_KIND_INST_MEM + && !gccbrig_is_bit_operation (brig_inst.opcode); + + /* Flush to zero. */ + bool ftz = false; + const BrigBase *base = &brig_inst.base; + + if (base->kind == BRIG_KIND_INST_MOD) + { + const BrigInstMod *mod = (const BrigInstMod *) base; + ftz = mod->modifier & BRIG_ALU_FTZ; + } + else if (base->kind == BRIG_KIND_INST_CMP) + { + const BrigInstCmp *cmp = (const BrigInstCmp *) base; + ftz = cmp->modifier & BRIG_ALU_FTZ; + } + + if (TREE_CODE (inst_expr) == CALL_EXPR) + { + tree func_decl = TREE_OPERAND (TREE_OPERAND (inst_expr, 1), 0); + input_type = TREE_TYPE (TREE_TYPE (func_decl)); + } + + if (ftz && (VECTOR_FLOAT_TYPE_P (TREE_TYPE (inst_expr)) + || SCALAR_FLOAT_TYPE_P (TREE_TYPE (inst_expr)) || is_fp16)) + { + /* Ensure we don't duplicate the arithmetics to the arguments of the bit + field reference operators. */ + inst_expr = add_temp_var ("before_ftz", inst_expr); + inst_expr = flush_to_zero (is_fp16) (*this, inst_expr); + } + + if (is_fp16) + { + inst_expr = add_temp_var ("before_f2h", inst_expr); + tree f2h_output = build_f2h_conversion (inst_expr); + tree conv_int = convert_to_integer (output_type, f2h_output); + tree assign = build2 (MODIFY_EXPR, output_type, output, conv_int); + m_parent.m_cf->append_statement (assign); + return assign; + } + else if (VECTOR_TYPE_P (TREE_TYPE (output))) + { + /* Expand/unpack the input value to the given vector elements. */ + size_t i; + tree input = inst_expr; + tree element_type = gccbrig_tree_type_for_hsa_type (brig_inst.type); + tree element; + tree last_assign = NULL_TREE; + FOR_EACH_CONSTRUCTOR_VALUE (CONSTRUCTOR_ELTS (output), i, element) + { + tree element_ref + = build3 (BIT_FIELD_REF, element_type, input, + TYPE_SIZE (element_type), + build_int_cst (uint32_type_node, + i * int_size_in_bytes (element_type) + * BITS_PER_UNIT)); + + last_assign + = build_output_assignment (brig_inst, element, element_ref); + } + return last_assign; + } + else + { + /* All we do here is to bitcast the result and store it to the + 'register' (variable). Mainly need to take care of differing + bitwidths. */ + size_t src_width = int_size_in_bytes (input_type); + size_t dst_width = int_size_in_bytes (output_type); + + if (src_width == dst_width) + { + /* A simple bitcast should do. */ + tree bitcast = build_reinterpret_cast (output_type, inst_expr); + tree assign = build2 (MODIFY_EXPR, output_type, output, bitcast); + m_parent.m_cf->append_statement (assign); + return assign; + } + else + { + tree conv_int = convert_to_integer (output_type, inst_expr); + tree assign = build2 (MODIFY_EXPR, output_type, output, conv_int); + m_parent.m_cf->append_statement (assign); + return assign; + } + } + return NULL_TREE; +} + +/* Appends a GENERIC statement (STMT) to the currently constructed function. */ + +void +brig_code_entry_handler::append_statement (tree stmt) +{ + m_parent.m_cf->append_statement (stmt); +} + +/* Unpacks the elements of the vector in VALUE to scalars (bit field + references) in ELEMENTS. */ + +void +brig_code_entry_handler::unpack (tree value, tree_stl_vec &elements) +{ + size_t vec_size = int_size_in_bytes (TREE_TYPE (value)); + size_t element_size + = int_size_in_bytes (TREE_TYPE (TREE_TYPE (value))) * BITS_PER_UNIT; + size_t element_count + = vec_size * BITS_PER_UNIT / element_size; + + tree input_element_type = TREE_TYPE (TREE_TYPE (value)); + + value = add_temp_var ("unpack_input", value); + + for (size_t i = 0; i < element_count; ++i) + { + tree element + = build3 (BIT_FIELD_REF, input_element_type, value, + TYPE_SIZE (input_element_type), + build_int_cst (unsigned_char_type_node, i * element_size)); + + element = add_temp_var ("scalar", element); + elements.push_back (element); + } +} + +/* Pack the elements of the scalars in ELEMENTS to the returned vector. */ + +tree +brig_code_entry_handler::pack (tree_stl_vec &elements) +{ + size_t element_count = elements.size (); + + gcc_assert (element_count > 1); + + tree output_element_type = TREE_TYPE (elements.at (0)); + + vec<constructor_elt, va_gc> *constructor_vals = NULL; + for (size_t i = 0; i < element_count; ++i) + CONSTRUCTOR_APPEND_ELT (constructor_vals, NULL_TREE, elements.at (i)); + + tree vec_type = build_vector_type (output_element_type, element_count); + + /* build_constructor creates a vector type which is not a vector_cst + that requires compile time constant elements. */ + tree vec = build_constructor (vec_type, constructor_vals); + + /* Add a temp variable for readability. */ + tree tmp_var = create_tmp_var (vec_type, "vec_out"); + tree vec_tmp_assign = build2 (MODIFY_EXPR, TREE_TYPE (tmp_var), tmp_var, vec); + m_parent.m_cf->append_statement (vec_tmp_assign); + return tmp_var; +} + +/* Visits the element(s) in the OPERAND, calling HANDLER to each of them. */ + +tree +tree_element_unary_visitor::operator () (brig_code_entry_handler &handler, + tree operand) +{ + if (VECTOR_TYPE_P (TREE_TYPE (operand))) + { + size_t vec_size = int_size_in_bytes (TREE_TYPE (operand)); + size_t element_size = int_size_in_bytes (TREE_TYPE (TREE_TYPE (operand))); + size_t element_count = vec_size / element_size; + + tree input_element_type = TREE_TYPE (TREE_TYPE (operand)); + tree output_element_type = NULL_TREE; + + vec<constructor_elt, va_gc> *constructor_vals = NULL; + for (size_t i = 0; i < element_count; ++i) + { + tree element = build3 (BIT_FIELD_REF, input_element_type, operand, + TYPE_SIZE (input_element_type), + build_int_cst (unsigned_char_type_node, + i * element_size + * BITS_PER_UNIT)); + + tree output = visit_element (handler, element); + output_element_type = TREE_TYPE (output); + + CONSTRUCTOR_APPEND_ELT (constructor_vals, NULL_TREE, output); + } + + tree vec_type = build_vector_type (output_element_type, element_count); + + /* build_constructor creates a vector type which is not a vector_cst + that requires compile time constant elements. */ + tree vec = build_constructor (vec_type, constructor_vals); + + /* Add a temp variable for readability. */ + tree tmp_var = create_tmp_var (vec_type, "vec_out"); + tree vec_tmp_assign + = build2 (MODIFY_EXPR, TREE_TYPE (tmp_var), tmp_var, vec); + handler.append_statement (vec_tmp_assign); + return tmp_var; + } + else + return visit_element (handler, operand); +} + +/* Visits the element pair(s) in the OPERAND0 and OPERAND1, calling HANDLER + to each of them. */ + +tree +tree_element_binary_visitor::operator () (brig_code_entry_handler &handler, + tree operand0, tree operand1) +{ + if (VECTOR_TYPE_P (TREE_TYPE (operand0))) + { + gcc_assert (VECTOR_TYPE_P (TREE_TYPE (operand1))); + size_t vec_size = int_size_in_bytes (TREE_TYPE (operand0)); + size_t element_size + = int_size_in_bytes (TREE_TYPE (TREE_TYPE (operand0))); + size_t element_count = vec_size / element_size; + + tree input_element_type = TREE_TYPE (TREE_TYPE (operand0)); + tree output_element_type = NULL_TREE; + + vec<constructor_elt, va_gc> *constructor_vals = NULL; + for (size_t i = 0; i < element_count; ++i) + { + + tree element0 = build3 (BIT_FIELD_REF, input_element_type, operand0, + TYPE_SIZE (input_element_type), + build_int_cst (unsigned_char_type_node, + i * element_size + * BITS_PER_UNIT)); + + tree element1 = build3 (BIT_FIELD_REF, input_element_type, operand1, + TYPE_SIZE (input_element_type), + build_int_cst (unsigned_char_type_node, + i * element_size + * BITS_PER_UNIT)); + + tree output = visit_element (handler, element0, element1); + output_element_type = TREE_TYPE (output); + + CONSTRUCTOR_APPEND_ELT (constructor_vals, NULL_TREE, output); + } + + tree vec_type = build_vector_type (output_element_type, element_count); + + /* build_constructor creates a vector type which is not a vector_cst + that requires compile time constant elements. */ + tree vec = build_constructor (vec_type, constructor_vals); + + /* Add a temp variable for readability. */ + tree tmp_var = create_tmp_var (vec_type, "vec_out"); + tree vec_tmp_assign + = build2 (MODIFY_EXPR, TREE_TYPE (tmp_var), tmp_var, vec); + handler.append_statement (vec_tmp_assign); + return tmp_var; + } + else + return visit_element (handler, operand0, operand1); +} + +/* Generates GENERIC code that flushes the visited element to zero. */ + +tree +flush_to_zero::visit_element (brig_code_entry_handler &, tree operand) +{ + size_t size = int_size_in_bytes (TREE_TYPE (operand)); + if (size == 4) + { + tree built_in + = (m_fp16) ? builtin_decl_explicit (BUILT_IN_HSAIL_FTZ_F32_F16) : + builtin_decl_explicit (BUILT_IN_HSAIL_FTZ_F32); + + return call_builtin (built_in, 1, float_type_node, float_type_node, + operand); + } + else if (size == 8) + { + return call_builtin (builtin_decl_explicit (BUILT_IN_HSAIL_FTZ_F64), 1, + double_type_node, double_type_node, operand); + } + else + gcc_unreachable (); + return NULL_TREE; +} + +/* Generates GENERIC code that converts a single precision float to half + precision float. */ + +tree +float_to_half::visit_element (brig_code_entry_handler &caller, tree operand) +{ + tree built_in = builtin_decl_explicit (BUILT_IN_HSAIL_F32_TO_F16); + + tree casted_operand = build_reinterpret_cast (uint32_type_node, operand); + + tree call = call_builtin (built_in, 1, uint16_type_node, uint32_type_node, + casted_operand); + tree output + = create_tmp_var (TREE_TYPE (TREE_TYPE (built_in)), "fp16out"); + tree assign = build2 (MODIFY_EXPR, TREE_TYPE (output), output, call); + caller.append_statement (assign); + return output; +} + +/* Generates GENERIC code that converts a half precision float to single + precision float. */ + +tree +half_to_float::visit_element (brig_code_entry_handler &caller, tree operand) +{ + tree built_in = builtin_decl_explicit (BUILT_IN_HSAIL_F16_TO_F32); + tree truncated_source = convert_to_integer (uint16_type_node, operand); + + tree call + = call_builtin (built_in, 1, uint32_type_node, uint16_type_node, + truncated_source); + + tree const_fp32_type + = build_type_variant (brig_to_generic::s_fp32_type, 1, 0); + + tree output = create_tmp_var (const_fp32_type, "fp32out"); + tree casted_result + = build_reinterpret_cast (brig_to_generic::s_fp32_type, call); + + tree assign = build2 (MODIFY_EXPR, TREE_TYPE (output), output, casted_result); + + caller.append_statement (assign); + + return output; +} + +/* Treats the INPUT as SRC_TYPE and sign or zero extends it to DEST_TYPE. */ + +tree +brig_code_entry_handler::extend_int (tree input, tree dest_type, tree src_type) +{ + /* Extend integer conversions according to the destination's + ext mode. First we need to clip the input register to + the possible smaller integer size to ensure the correct sign + bit is extended. */ + tree clipped_input = convert_to_integer (src_type, input); + tree conversion_result; + + if (TYPE_UNSIGNED (src_type)) + conversion_result + = convert_to_integer (unsigned_type_for (dest_type), clipped_input); + else + conversion_result + = convert_to_integer (signed_type_for (dest_type), clipped_input); + + /* Treat the result as unsigned so we do not sign extend to the + register width. For some reason this GENERIC sequence sign + extends to the s register: + + D.1541 = (signed char) s1; + D.1542 = (signed short) D.1541; + s0 = (unsigned int) D.1542 + */ + + /* The converted result is then extended to the target register + width, using the same sign as the destination. */ + return convert_to_integer (dest_type, conversion_result); +} + +/* Returns the integer constant value of the given node. + If it's a cast, looks into the source of the cast. */ +HOST_WIDE_INT +brig_code_entry_handler::int_constant_value (tree node) +{ + tree n = node; + if (TREE_CODE (n) == VIEW_CONVERT_EXPR) + n = TREE_OPERAND (n, 0); + return int_cst_value (n); +} + diff --git a/gcc/brig/brigfrontend/brig-code-entry-handler.h b/gcc/brig/brigfrontend/brig-code-entry-handler.h new file mode 100644 index 00000000000..8e7fbe86823 --- /dev/null +++ b/gcc/brig/brigfrontend/brig-code-entry-handler.h @@ -0,0 +1,425 @@ +/* brig-code-entry-handler.h -- a gccbrig base class + Copyright (C) 2016 Free Software Foundation, Inc. + Contributed by Pekka Jaaskelainen <pekka.jaaskelainen@parmance.com> + for General Processor Tech. + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify it under + the terms of the GNU General Public License as published by the Free + Software Foundation; either version 3, or (at your option) any later + version. + + GCC is distributed in the hope that it will be useful, but WITHOUT ANY + WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + for more details. + + You should have received a copy of the GNU General Public License + along with GCC; see the file COPYING3. If not see + <http://www.gnu.org/licenses/>. */ + +#ifndef GCC_BRIG_CODE_ENTRY_HANDLER_H +#define GCC_BRIG_CODE_ENTRY_HANDLER_H + +#include "brig-to-generic.h" + +#include <map> +#include <vector> + +class tree_element_unary_visitor; + +/* An interface to organize the different types of element handlers + for the code section. */ + +class brig_code_entry_handler : public brig_entry_handler +{ +public: + typedef std::map<std::pair<BrigOpcode16_t, BrigType16_t>, tree> builtin_map; + + brig_code_entry_handler (brig_to_generic &parent); + + /* Handles the brig_code data at the given pointer and adds it to the + currently built tree. Returns the number of consumed bytes. */ + + virtual size_t operator () (const BrigBase *base) = 0; + + void append_statement (tree stmt); + +protected: + + tree get_tree_expr_type_for_hsa_type (BrigType16_t brig_type) const; + tree get_tree_cst_for_hsa_operand (const BrigOperandConstantBytes *brigConst, + tree type) const; + tree get_builtin_for_hsa_opcode (tree type, BrigOpcode16_t brig_opcode, + BrigType16_t brig_type) const; + tree get_comparison_result_type (tree source_type); + + tree build_code_ref (const BrigBase &ref); + + tree build_tree_operand (const BrigInstBase &brig_inst, + const BrigBase &operand, + tree operand_type = NULL_TREE, + bool is_input = false); + + tree build_address_operand (const BrigInstBase &brig_inst, + const BrigOperandAddress &addr_operand); + + tree build_tree_operand_from_brig (const BrigInstBase *brig_inst, + tree operand_type, size_t operand_index); + + tree build_tree_cst_element (BrigType16_t element_type, + const unsigned char *next_data) const; + + bool needs_workitem_context_data (BrigOpcode16_t brig_opcode) const; + + void unpack (tree value, tree_stl_vec &elements); + tree pack (tree_stl_vec &elements); + + bool can_expand_builtin (BrigOpcode16_t brig_opcode) const; + tree expand_builtin (BrigOpcode16_t brig_opcode, tree_stl_vec &operands); + + tree expand_or_call_builtin (BrigOpcode16_t brig_opcode, + BrigType16_t brig_type, tree arith_type, + tree_stl_vec &operands); + + tree add_temp_var (std::string name, tree expr); + + tree build_f2h_conversion (tree source); + tree build_h2f_conversion (tree source); + + tree_stl_vec build_operands (const BrigInstBase &brig_inst); + tree build_output_assignment (const BrigInstBase &brig_inst, tree output, + tree inst_expr); + + tree apply_to_all_elements (tree_element_unary_visitor &visitor, + tree operand); + + HOST_WIDE_INT int_constant_value (tree node); + + tree extend_int (tree input, tree dest_type, tree src_type); + + /* HSAIL-specific builtin functions not yet integrated to gcc. */ + + static builtin_map s_custom_builtins; +}; + +/* Implement the Visitor software pattern for performing various actions on + elements of vector operands. This enables separating the vector element + traversal/extraction/packing code from whatever different actions are + performed to each element. */ + +class tree_element_unary_visitor +{ +public: + tree operator () (brig_code_entry_handler &handler, tree operand); + + /* Performs an action to a single element, which can have originally + been a vector element or a scalar. */ + + virtual tree visit_element (brig_code_entry_handler &handler, tree operand) + = 0; +}; + +class tree_element_binary_visitor +{ +public: + tree operator () (brig_code_entry_handler &handler, tree operand0, + tree operand1); + + /* Performs an action to a pair of elements, which can have originally + been a vector element or a scalar. */ + + virtual tree visit_element (brig_code_entry_handler &handler, tree operand0, + tree operand1) + = 0; +}; + +/* Visitor for flushing float elements to zero. */ + +class flush_to_zero : public tree_element_unary_visitor +{ +public: + flush_to_zero (bool fp16) : m_fp16 (fp16) + { + } + + virtual tree visit_element (brig_code_entry_handler &caller, tree operand); + +private: + + /* True if the value should be flushed according to fp16 limits. */ + + bool m_fp16; +}; + +/* Visitor for converting F16 elements to F32. */ + +class half_to_float : public tree_element_unary_visitor +{ +public: + virtual tree visit_element (brig_code_entry_handler &caller, tree operand); +}; + +/* Visitor for converting F32 elements to F16. */ + +class float_to_half : public tree_element_unary_visitor +{ +public: + virtual tree visit_element (brig_code_entry_handler &caller, tree operand); +}; + +/* A base class for instruction types that support floating point + modifiers. + + operator () delegates to subclasses (template method pattern) in + type specific parts. */ + +class brig_inst_mod_handler : public brig_code_entry_handler +{ +public: + brig_inst_mod_handler (brig_to_generic &parent) + : brig_code_entry_handler (parent) + { + } + + virtual size_t generate (const BrigBase *base); + virtual const BrigAluModifier8_t *modifier (const BrigBase *base) const; + virtual const BrigRound8_t *round (const BrigBase *base) const; + + size_t operator () (const BrigBase *base); +}; + +class brig_directive_function_handler : public brig_code_entry_handler +{ +public: + brig_directive_function_handler (brig_to_generic &parent) + : brig_code_entry_handler (parent) + { + } + size_t operator () (const BrigBase *base); +}; + +class brig_directive_control_handler : public brig_code_entry_handler +{ +public: + brig_directive_control_handler (brig_to_generic &parent) + : brig_code_entry_handler (parent) + { + } + + size_t operator () (const BrigBase *base); +}; + +class brig_directive_variable_handler : public brig_code_entry_handler +{ +public: + brig_directive_variable_handler (brig_to_generic &parent) + : brig_code_entry_handler (parent) + { + } + + size_t operator () (const BrigBase *base); + + tree build_variable (const BrigDirectiveVariable *brigVar, + tree_code var_decltype = VAR_DECL); + + size_t get_brig_var_alignment (const BrigDirectiveVariable *brigVar); +}; + +class brig_directive_fbarrier_handler : public brig_code_entry_handler +{ +public: + brig_directive_fbarrier_handler (brig_to_generic &parent) + : brig_code_entry_handler (parent) + { + } + + size_t operator () (const BrigBase *base); +}; + +class brig_directive_label_handler : public brig_code_entry_handler +{ +public: + brig_directive_label_handler (brig_to_generic &parent) + : brig_code_entry_handler (parent) + { + } + + size_t operator () (const BrigBase *base); +}; + +class brig_directive_comment_handler : public brig_code_entry_handler +{ +public: + brig_directive_comment_handler (brig_to_generic &parent) + : brig_code_entry_handler (parent) + { + } + + size_t operator () (const BrigBase *base); +}; + +class brig_directive_arg_block_handler : public brig_code_entry_handler +{ +public: + brig_directive_arg_block_handler (brig_to_generic &parent) + : brig_code_entry_handler (parent) + { + } + + size_t operator () (const BrigBase *base); +}; + +class brig_basic_inst_handler : public brig_code_entry_handler +{ +public: + brig_basic_inst_handler (brig_to_generic &parent); + + size_t operator () (const BrigBase *base); + +private: + tree build_lower_element_broadcast (tree vec_operand); + + bool must_be_scalarized (const BrigInstBase *brig_inst, + tree instr_type) const; + + tree build_inst_expr (BrigOpcode16_t brig_opcode, BrigType16_t brig_type, + tree arith_type, tree_stl_vec &operands); + + tree build_shuffle (tree arith_type, tree_stl_vec &operands); + tree build_unpack (tree_stl_vec &operands); + tree build_pack (tree_stl_vec &operands); + + tree build_unpack_lo_or_hi (BrigOpcode16_t brig_opcode, tree arith_type, + tree_stl_vec &operands); + + tree_code get_tree_code_for_hsa_opcode (BrigOpcode16_t brig_opcode, + BrigType16_t brig_type) const; +}; + +class brig_cvt_inst_handler : public brig_inst_mod_handler +{ +public: + brig_cvt_inst_handler (brig_to_generic &parent) + : brig_inst_mod_handler (parent) + { + } + + virtual size_t generate (const BrigBase *base); + virtual const BrigAluModifier8_t *modifier (const BrigBase *base) const; + virtual const BrigRound8_t *round (const BrigBase *base) const; +}; + +class brig_branch_inst_handler : public brig_code_entry_handler +{ +public: + brig_branch_inst_handler (brig_to_generic &parent) + : brig_code_entry_handler (parent) + { + } + + size_t operator () (const BrigBase *base); +}; + +class brig_mem_inst_handler : public brig_code_entry_handler +{ +public: + brig_mem_inst_handler (brig_to_generic &parent) + : brig_code_entry_handler (parent) + { + } + + size_t operator () (const BrigBase *base); + +private: + tree build_mem_access (const BrigInstBase *brig_inst, tree addr, tree data); +}; + +class brig_copy_move_inst_handler : public brig_code_entry_handler +{ +public: + brig_copy_move_inst_handler (brig_to_generic &parent) + : brig_code_entry_handler (parent) + { + } + + size_t operator () (const BrigBase *base); + +private: + size_t handle_lda (const BrigInstBase *base); +}; + +class brig_atomic_inst_handler : public brig_code_entry_handler +{ +private: + typedef std::map<std::string, tree> atomic_builtins_map; + +public: + brig_atomic_inst_handler (brig_to_generic &parent); + + size_t operator () (const BrigBase *base); + +protected: + size_t generate_tree (const BrigInstBase &inst, + BrigAtomicOperation8_t atomic_opcode); +}; + +class brig_signal_inst_handler : public brig_atomic_inst_handler +{ +public: + brig_signal_inst_handler (brig_to_generic &parent) + : brig_atomic_inst_handler (parent) + { + } + size_t operator () (const BrigBase *base); +}; + +class brig_cmp_inst_handler : public brig_code_entry_handler +{ +public: + brig_cmp_inst_handler (brig_to_generic &parent) + : brig_code_entry_handler (parent) + { + } + + size_t operator () (const BrigBase *base); +}; + +class brig_seg_inst_handler : public brig_code_entry_handler +{ +public: + brig_seg_inst_handler (brig_to_generic &parent); + + size_t operator () (const BrigBase *base); +}; + +class brig_lane_inst_handler : public brig_code_entry_handler +{ +public: + brig_lane_inst_handler (brig_to_generic &parent); + + size_t operator () (const BrigBase *base); +}; + +class brig_queue_inst_handler : public brig_code_entry_handler +{ +public: + brig_queue_inst_handler (brig_to_generic &parent); + + size_t operator () (const BrigBase *base); +}; + +class brig_directive_module_handler : public brig_code_entry_handler +{ +public: + brig_directive_module_handler (brig_to_generic &parent) + : brig_code_entry_handler (parent) + { + } + + size_t operator () (const BrigBase *base); +}; + + +#endif diff --git a/gcc/brig/brigfrontend/brig-comment-handler.cc b/gcc/brig/brigfrontend/brig-comment-handler.cc new file mode 100644 index 00000000000..61a187fade1 --- /dev/null +++ b/gcc/brig/brigfrontend/brig-comment-handler.cc @@ -0,0 +1,38 @@ +/* brig-comment-handler.cc -- brig comment directive handling + Copyright (C) 2016 Free Software Foundation, Inc. + Contributed by Pekka Jaaskelainen <pekka.jaaskelainen@parmance.com> + for General Processor Tech. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 3, or (at your option) any later +version. + +GCC is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +<http://www.gnu.org/licenses/>. */ + +#include "brig-code-entry-handler.h" + +extern int gccbrig_verbose; + +size_t +brig_directive_comment_handler::operator () (const BrigBase *base) +{ + const BrigDirectiveComment *brig_comment + = (const BrigDirectiveComment *) base; + + if (gccbrig_verbose) + { + std::string cmnt = m_parent.get_string (brig_comment->name); + fprintf (stderr, "brig: Comment: '%s'\n", cmnt.c_str()); + } + return base->byteCount; +} diff --git a/gcc/brig/brigfrontend/brig-control-handler.cc b/gcc/brig/brigfrontend/brig-control-handler.cc new file mode 100644 index 00000000000..2e4f5e36a77 --- /dev/null +++ b/gcc/brig/brigfrontend/brig-control-handler.cc @@ -0,0 +1,108 @@ +/* brig-control-handler.cc -- brig control directive handling + Copyright (C) 2016 Free Software Foundation, Inc. + Contributed by Pekka Jaaskelainen <pekka.jaaskelainen@parmance.com> + for General Processor Tech. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 3, or (at your option) any later +version. + +GCC is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +<http://www.gnu.org/licenses/>. */ + +#include "brig-code-entry-handler.h" +#include "diagnostic.h" +#include "print-tree.h" + +size_t +brig_directive_control_handler::operator () (const BrigBase *base) +{ + const BrigDirectiveControl *inst = (const BrigDirectiveControl *) base; + const BrigData *operand_entries + = m_parent.get_brig_data_entry (inst->operands); + + /* Parse the constant integer operands. */ + std::vector<tree> operands; + for (size_t i = 0; i < operand_entries->byteCount / 4; ++i) + { + uint32_t operand_offset + = ((const uint32_t *) &operand_entries->bytes)[i]; + const BrigBase *operand_data + = m_parent.get_brig_operand_entry (operand_offset); + + tree operand_type + = (inst->control == BRIG_CONTROL_REQUIREDGRIDSIZE + || inst->control == BRIG_CONTROL_MAXFLATGRIDSIZE) ? + uint64_type_node : uint32_type_node; + operands.push_back + (build_tree_operand (*(const BrigInstBase*)inst, *operand_data, + operand_type)); + } + + switch (inst->control) + { + case BRIG_CONTROL_MAXDYNAMICGROUPSIZE: + { + m_parent.m_cf->m_descriptor.max_dynamic_group_size + = int_constant_value (operands.at (0)); + break; + } + case BRIG_CONTROL_MAXFLATGRIDSIZE: + { + m_parent.m_cf->m_descriptor.max_flat_grid_size + = int_constant_value (operands.at (0)); + break; + } + case BRIG_CONTROL_MAXFLATWORKGROUPSIZE: + { + m_parent.m_cf->m_descriptor.max_flat_workgroup_size + = int_constant_value (operands.at (0)); + break; + } + case BRIG_CONTROL_REQUIREDDIM: + { + m_parent.m_cf->m_descriptor.required_dim + = int_constant_value (operands.at (0)); + break; + } + case BRIG_CONTROL_REQUIREDGRIDSIZE: + { + m_parent.m_cf->m_descriptor.required_grid_size[0] + = int_constant_value (operands.at (0)); + m_parent.m_cf->m_descriptor.required_grid_size[1] + = int_constant_value (operands.at (1)); + m_parent.m_cf->m_descriptor.required_grid_size[2] + = int_constant_value (operands.at (2)); + break; + } + case BRIG_CONTROL_REQUIREDWORKGROUPSIZE: + { + m_parent.m_cf->m_descriptor.required_workgroup_size[0] + = int_constant_value (operands.at (0)); + m_parent.m_cf->m_descriptor.required_workgroup_size[1] + = int_constant_value (operands.at (1)); + m_parent.m_cf->m_descriptor.required_workgroup_size[2] + = int_constant_value (operands.at (2)); + break; + } + case BRIG_CONTROL_REQUIRENOPARTIALWORKGROUPS: + /* Performance hint only, ignored for now. */ + break; + case BRIG_CONTROL_ENABLEBREAKEXCEPTIONS: + case BRIG_CONTROL_ENABLEDETECTEXCEPTIONS: + /* Unimplemented. */ + break; + default: + sorry ("Unsupported control directive %x.\n", inst->control); + } + return base->byteCount; +} diff --git a/gcc/brig/brigfrontend/brig-copy-move-inst-handler.cc b/gcc/brig/brigfrontend/brig-copy-move-inst-handler.cc new file mode 100644 index 00000000000..08ff26d9282 --- /dev/null +++ b/gcc/brig/brigfrontend/brig-copy-move-inst-handler.cc @@ -0,0 +1,73 @@ +/* brig-copy-move-inst-handler.cc -- brig copy/move instruction handling + Copyright (C) 2016 Free Software Foundation, Inc. + Contributed by Pekka Jaaskelainen <pekka.jaaskelainen@parmance.com> + for General Processor Tech. + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify it under + the terms of the GNU General Public License as published by the Free + Software Foundation; either version 3, or (at your option) any later + version. + + GCC is distributed in the hope that it will be useful, but WITHOUT ANY + WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + for more details. + + You should have received a copy of the GNU General Public License + along with GCC; see the file COPYING3. If not see + <http://www.gnu.org/licenses/>. */ + +#include "brig-code-entry-handler.h" +#include "tree-pretty-print.h" +#include "print-tree.h" +#include "errors.h" +#include "brig-util.h" + +size_t +brig_copy_move_inst_handler::handle_lda (const BrigInstBase *brig_inst) +{ + tree dest_type = gccbrig_tree_type_for_hsa_type (brig_inst->type); + + tree input = build_tree_operand_from_brig (brig_inst, NULL, 1); + tree output = build_tree_operand_from_brig (brig_inst, dest_type, 0); + + build_output_assignment (*brig_inst, output, input); + return brig_inst->base.byteCount; +} + +size_t +brig_copy_move_inst_handler::operator () (const BrigBase *base) +{ + const BrigInstBase *brig_inst + = (const BrigInstBase *) &((const BrigInstBasic *) base)->base; + + if (brig_inst->opcode == BRIG_OPCODE_LDA) + return handle_lda (brig_inst); + + const BrigInstSourceType *inst_src_type = (const BrigInstSourceType *) base; + + tree source_type = gccbrig_tree_type_for_hsa_type (inst_src_type->sourceType); + tree dest_type = gccbrig_tree_type_for_hsa_type (brig_inst->type); + + tree input = build_tree_operand_from_brig (brig_inst, source_type, 1); + tree output = build_tree_operand_from_brig (brig_inst, dest_type, 0); + if (brig_inst->opcode == BRIG_OPCODE_COMBINE) + { + /* For combine, a simple reinterpret cast from the array constructor + works. */ + + tree casted = build_reinterpret_cast (dest_type, input); + tree assign = build2 (MODIFY_EXPR, TREE_TYPE (output), output, casted); + m_parent.m_cf->append_statement (assign); + } + else if (brig_inst->opcode == BRIG_OPCODE_EXPAND) + build_output_assignment (*brig_inst, output, input); + else + { + brig_basic_inst_handler basic (m_parent); + return basic (base); + } + return base->byteCount; +} diff --git a/gcc/brig/brigfrontend/brig-cvt-inst-handler.cc b/gcc/brig/brigfrontend/brig-cvt-inst-handler.cc new file mode 100644 index 00000000000..a5b16197793 --- /dev/null +++ b/gcc/brig/brigfrontend/brig-cvt-inst-handler.cc @@ -0,0 +1,260 @@ +/* brig-cvt-inst-handler.cc -- brig cvt (convert) instruction handling + Copyright (C) 2016 Free Software Foundation, Inc. + Contributed by Pekka Jaaskelainen <pekka.jaaskelainen@parmance.com> + for General Processor Tech. + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify it under + the terms of the GNU General Public License as published by the Free + Software Foundation; either version 3, or (at your option) any later + version. + + GCC is distributed in the hope that it will be useful, but WITHOUT ANY + WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + for more details. + + You should have received a copy of the GNU General Public License + along with GCC; see the file COPYING3. If not see + <http://www.gnu.org/licenses/>. */ + +#include <sstream> + +#include "brig-code-entry-handler.h" + +#include "gimple-expr.h" +#include "errors.h" +#include "convert.h" +#include "tree-pretty-print.h" +#include "print-tree.h" +#include "diagnostic-core.h" +#include "brig-util.h" + +const BrigAluModifier8_t * +brig_cvt_inst_handler::modifier (const BrigBase *base) const +{ + const BrigInstCvt *inst = (const BrigInstCvt *) base; + return &inst->modifier; +} + +const BrigRound8_t * +brig_cvt_inst_handler::round (const BrigBase *base) const +{ + const BrigInstCvt *inst = (const BrigInstCvt *) base; + return &inst->round; +} + +size_t +brig_cvt_inst_handler::generate (const BrigBase *base) +{ + /* In cvt instructions there can be at least four data types involved: + + - the input register type + - the output register type + - the conversion source type + - the conversion destination type + */ + + const BrigInstBase *brig_inst + = (const BrigInstBase *) &((const BrigInstBasic *) base)->base; + const BrigInstCvt *cvt_inst = (const BrigInstCvt *) base; + + const BrigAluModifier8_t *inst_modifier = modifier (base); + const bool FTZ = inst_modifier != NULL && (*inst_modifier) & BRIG_ALU_FTZ; + + /* The conversion source type. */ + tree src_type = get_tree_expr_type_for_hsa_type (cvt_inst->sourceType); + + bool src_is_fp16 = cvt_inst->sourceType == BRIG_TYPE_F16; + + /* The conversion destination type. */ + tree dest_type = gccbrig_tree_type_for_hsa_type (brig_inst->type); + + bool dest_is_fp16 = brig_inst->type == BRIG_TYPE_F16; + + if (!dest_type || !src_type) + { + gcc_unreachable (); + return base->byteCount; + } + + tree_stl_vec operands = build_operands (*brig_inst); + tree &input = operands.at (1); + tree &output = operands.at (0); + + size_t conv_src_size = int_size_in_bytes (src_type); + size_t conv_dst_size = int_size_in_bytes (dest_type); + size_t src_reg_size = int_size_in_bytes (TREE_TYPE (input)); + + /* The input register can be of different type&size than the + conversion input size. First cast the input to the conversion + input type. These casts are always bitcasts which can be + expressed as casts between different unsigned integers. */ + if (src_reg_size != conv_src_size) + { + tree unsigned_int_type = NULL_TREE; + if (INTEGRAL_TYPE_P (src_type)) + unsigned_int_type = unsigned_type_for (src_type); + else /* Find a matching size int type for the REAL type. */ + { + if (conv_src_size == 2) + unsigned_int_type = gccbrig_tree_type_for_hsa_type (BRIG_TYPE_U16); + else if (conv_src_size == 4) + unsigned_int_type = gccbrig_tree_type_for_hsa_type (BRIG_TYPE_U32); + else if (conv_src_size == 8) + unsigned_int_type = gccbrig_tree_type_for_hsa_type (BRIG_TYPE_U64); + else + gcc_unreachable (); + } + input = convert_to_integer (unsigned_int_type, input); + } + + if (src_is_fp16) + input = build_h2f_conversion (input); + + /* Flush the float operand to zero if indicated with 'ftz'. */ + if (FTZ && SCALAR_FLOAT_TYPE_P (src_type)) + { + tree casted_input = build_reinterpret_cast (src_type, input); + input = flush_to_zero (src_is_fp16) (*this, casted_input); + } + + tree conversion_result = NULL_TREE; + if (brig_inst->type == BRIG_TYPE_B1) + { + /* When the destination is b1, cvt does a 'ztest' operation which is + defined as a != 0 for integers and similarly (!= 0.0f) for floats. */ + if (INTEGRAL_TYPE_P (src_type)) + { + /* Generate an integer not equal operation. */ + conversion_result = build2 (NE_EXPR, TREE_TYPE (input), input, + build_int_cst (TREE_TYPE (input), 0)); + } + else + { + /* For REAL source types, ztest returns 1 if the value is not +- 0.0f. + We can perform this check with an integer comparison after + masking away the sign bit from a correct position. This is safer + than using absf because of exceptions in case of a NaN + input (NaN exceptions are not generated with cvt). */ + tree unsigned_int_type = NULL_TREE; + /* Bit battern with all but the upper bit 1. */ + tree and_mask = NULL_TREE; + if (conv_src_size == 2) + { + unsigned_int_type = gccbrig_tree_type_for_hsa_type (BRIG_TYPE_U16); + and_mask = build_int_cst (unsigned_int_type, 0x7FFF); + } + else if (conv_src_size == 4) + { + unsigned_int_type = gccbrig_tree_type_for_hsa_type (BRIG_TYPE_U32); + and_mask = build_int_cst (unsigned_int_type, 0x7FFFFFFF); + } + else if (conv_src_size == 8) + { + unsigned_int_type = gccbrig_tree_type_for_hsa_type (BRIG_TYPE_U64); + and_mask = build_int_cst (unsigned_int_type, 0x7FFFFFFFFFFFFFFF); + } + else + gcc_unreachable (); + tree casted_input = build_reinterpret_cast (unsigned_int_type, input); + tree masked_input + = build2 (BIT_AND_EXPR, unsigned_int_type, casted_input, and_mask); + conversion_result + = build2 (NE_EXPR, TREE_TYPE (masked_input), masked_input, + build_int_cst (unsigned_int_type, 0)); + } + /* The result from the comparison is a boolean, convert it to such. */ + conversion_result + = convert_to_integer (gccbrig_tree_type_for_hsa_type (BRIG_TYPE_B1), + conversion_result); + } + else if (dest_is_fp16) + { + tree casted_input = build_reinterpret_cast (src_type, input); + conversion_result + = convert_to_real (brig_to_generic::s_fp32_type, casted_input); + if (FTZ) + conversion_result = flush_to_zero (true) (*this, conversion_result); + conversion_result = build_f2h_conversion (conversion_result); + } + else if (SCALAR_FLOAT_TYPE_P (dest_type)) + { + tree casted_input = build_reinterpret_cast (src_type, input); + conversion_result = convert_to_real (dest_type, casted_input); + } + else if (INTEGRAL_TYPE_P (dest_type) && INTEGRAL_TYPE_P (src_type)) + { + conversion_result = extend_int (input, dest_type, src_type); + } + else if (INTEGRAL_TYPE_P (dest_type) && SCALAR_FLOAT_TYPE_P (src_type)) + { + + if (cvt_inst->round == BRIG_ROUND_INTEGER_ZERO_SAT) + { + + /* Use builtins for the saturating conversions. */ +#undef DEF_HSAIL_SAT_BUILTIN +#undef DEF_HSAIL_BUILTIN +#undef DEF_HSAIL_ATOMIC_BUILTIN +#undef DEF_HSAIL_INTR_BUILTIN +#undef DEF_HSAIL_CVT_ZEROI_SAT_BUILTIN + + tree builtin = NULL_TREE; + BrigType16_t src_arith_type + = src_is_fp16 + ? (BrigType16_t) BRIG_TYPE_F32 : cvt_inst->sourceType; +#define DEF_HSAIL_CVT_ZEROI_SAT_BUILTIN(ENUM, HSAIL_DST_TYPE, HSAIL_SRC_TYPE, \ + NAME, TYPE, ATTRS) \ + if (brig_inst->type == HSAIL_DST_TYPE \ + && src_arith_type == HSAIL_SRC_TYPE) \ + builtin = builtin_decl_explicit (ENUM); \ + else +#include "brig-builtins.def" + gcc_unreachable (); + + tree casted_input = build_reinterpret_cast (src_type, input); + conversion_result + = call_builtin (builtin, 1, dest_type, src_type, casted_input); + } + else + { + tree casted_input = build_reinterpret_cast (src_type, input); + + /* Perform the int to float conversion. */ + conversion_result = convert_to_integer (dest_type, casted_input); + } + /* The converted result is finally extended to the target register + width, using the same sign as the destination. */ + conversion_result + = convert_to_integer (TREE_TYPE (output), conversion_result); + } + else + { + /* Just use CONVERT_EXPR and hope for the best. */ + tree casted_input = build_reinterpret_cast (dest_type, input); + conversion_result = build1 (CONVERT_EXPR, dest_type, casted_input); + } + + size_t dst_reg_size = int_size_in_bytes (TREE_TYPE (output)); + + tree assign = NULL_TREE; + /* The output register can be of different type&size than the + conversion output size. Cast it to the register variable type. */ + if (dst_reg_size > conv_dst_size) + { + tree casted_output + = build1 (CONVERT_EXPR, TREE_TYPE (output), conversion_result); + assign = build2 (MODIFY_EXPR, TREE_TYPE (output), output, casted_output); + } + else + { + tree casted_output + = build_reinterpret_cast (TREE_TYPE (output), conversion_result); + assign = build2 (MODIFY_EXPR, TREE_TYPE (output), output, casted_output); + } + m_parent.m_cf->append_statement (assign); + + return base->byteCount; +} diff --git a/gcc/brig/brigfrontend/brig-fbarrier-handler.cc b/gcc/brig/brigfrontend/brig-fbarrier-handler.cc new file mode 100644 index 00000000000..b236885d164 --- /dev/null +++ b/gcc/brig/brigfrontend/brig-fbarrier-handler.cc @@ -0,0 +1,44 @@ +/* brig-fbarrier-handler.cc -- brig fbarrier directive handling + Copyright (C) 2016 Free Software Foundation, Inc. + Contributed by Pekka Jaaskelainen <pekka.jaaskelainen@parmance.com> + for General Processor Tech. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 3, or (at your option) any later +version. + +GCC is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +<http://www.gnu.org/licenses/>. */ + +#include "brig-code-entry-handler.h" + +#include "stringpool.h" +#include "errors.h" + +/* Allocate this many bytes from the group segment for each fbarrier. */ +#define FBARRIER_STRUCT_SIZE 32 + +size_t +brig_directive_fbarrier_handler::operator () (const BrigBase *base) +{ + /* Model fbarriers as group segment variables with fixed size + large enough to store whatever data the actual target needs + to store to maintain the barrier info. The handle is the + offset to the beginning of the object. */ + + const BrigDirectiveFbarrier* fbar = (const BrigDirectiveFbarrier*)base; + if (m_parent.m_cf != NULL) + m_parent.m_cf->m_function_scope_vars.insert (base); + std::string var_name = m_parent.get_mangled_name (fbar); + m_parent.append_group_variable (var_name, FBARRIER_STRUCT_SIZE, 1); + return base->byteCount; +} diff --git a/gcc/brig/brigfrontend/brig-function-handler.cc b/gcc/brig/brigfrontend/brig-function-handler.cc new file mode 100644 index 00000000000..4e05680872b --- /dev/null +++ b/gcc/brig/brigfrontend/brig-function-handler.cc @@ -0,0 +1,374 @@ +/* brig-code-entry-handler.cc -- brig function directive handling + Copyright (C) 2016 Free Software Foundation, Inc. + Contributed by Pekka Jaaskelainen <pekka.jaaskelainen@parmance.com> + for General Processor Tech. + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify it under + the terms of the GNU General Public License as published by the Free + Software Foundation; either version 3, or (at your option) any later + version. + + GCC is distributed in the hope that it will be useful, but WITHOUT ANY + WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + for more details. + + You should have received a copy of the GNU General Public License + along with GCC; see the file COPYING3. If not see + <http://www.gnu.org/licenses/>. */ + +#include <sstream> +#include <iomanip> + +#include "brig-code-entry-handler.h" + +#include "brig-machine.h" +#include "stringpool.h" +#include "tree-iterator.h" +#include "gimple-expr.h" +#include "function.h" +#include "phsa.h" + +#include "tree-pretty-print.h" +#include "print-tree.h" + +extern int gccbrig_verbose; + +size_t +brig_directive_function_handler::operator () (const BrigBase *base) +{ + m_parent.finish_function (); + + size_t bytes_consumed = base->byteCount; + + const BrigDirectiveExecutable *exec = (const BrigDirectiveExecutable *) base; + + if (gccbrig_verbose) + { + printf ("brig: function name %s\n", + m_parent.get_string (exec->name).c_str()); + printf ("brig: inargs %d outargs %d name offset %d\n", exec->inArgCount, + exec->outArgCount, exec->name); + } + + const bool is_definition + = exec->modifier & BRIG_EXECUTABLE_DEFINITION; + + const bool is_kernel = base->kind == BRIG_KIND_DIRECTIVE_KERNEL; + + /* There doesn't seem to be actual use cases for kernel declarations + as they cannot be called by the program. Ignore them until there's + a reason not to. */ + if (is_kernel && !is_definition) + return bytes_consumed; + + m_parent.m_cf = new brig_function (exec, &m_parent); + + std::string func_name = m_parent.get_mangled_name (exec); + + tree fndecl; + tree ret_value = NULL_TREE; + + tree stmt_list = alloc_stmt_list (); + + /* Add a function scope BIND_EXPR using which we can push local variables that + represent HSAIL registers. */ + tree bind_expr = build3 (BIND_EXPR, void_type_node, NULL, stmt_list, NULL); + + if (is_kernel) + { + /* The generated kernel function is not the one that should be + called by the host. */ + func_name = std::string ("_") + func_name; + + tree name_identifier + = get_identifier_with_length (func_name.c_str (), func_name.size ()); + + /* The generated kernel functions take the following arguments: + + 1) a char* which is a starting address of the argument segment where + the call's arguments are stored by the launcher. + 2) a void* parameter that points to a phsail-finalizer context object + which passes the hsa kernel packet etc. + 3) a void* parameter that contains the first flat address of the group + region allocated to the current work-group. */ + + tree char_ptr_type_node = build_pointer_type (char_type_node); + fndecl = build_decl (UNKNOWN_LOCATION, FUNCTION_DECL, name_identifier, + build_function_type_list (void_type_node, + char_ptr_type_node, + ptr_type_node, + ptr_type_node, NULL_TREE)); + + SET_DECL_ASSEMBLER_NAME (fndecl, name_identifier); + + tree resdecl + = build_decl (UNKNOWN_LOCATION, RESULT_DECL, NULL_TREE, void_type_node); + + tree typelist = TYPE_ARG_TYPES (TREE_TYPE (fndecl)); + tree argtype = TREE_VALUE (typelist); + TYPE_ADDR_SPACE (argtype) + = gccbrig_get_target_addr_space_id (BRIG_SEGMENT_KERNARG); + + tree arg_arg = build_decl (UNKNOWN_LOCATION, PARM_DECL, + get_identifier ("__args"), char_ptr_type_node); + DECL_ARGUMENTS (fndecl) = arg_arg; + DECL_ARG_TYPE (arg_arg) = char_ptr_type_node; + DECL_CONTEXT (arg_arg) = fndecl; + DECL_ARTIFICIAL (arg_arg) = 1; + TREE_READONLY (arg_arg) = 1; + TREE_USED (arg_arg) = 1; + + DECL_RESULT (fndecl) = resdecl; + DECL_CONTEXT (resdecl) = fndecl; + DECL_EXTERNAL (fndecl) = 0; + } + else + { + /* Build a regular function fingerprint to enable targets to optimize + the calling convention as they see fit. */ + tree name_identifier + = get_identifier_with_length (func_name.c_str (), func_name.size ()); + + m_parent.m_cf->m_arg_variables.clear (); + + brig_directive_variable_handler arg_handler (m_parent); + + vec<tree, va_gc> *args; + vec_alloc (args, 4); + + tree arg_decls = NULL_TREE; + + tree ret_type = void_type_node; + if (exec->outArgCount == 1) + { + /* The return value variable should be the first entry after the + function directive. */ + const BrigBase *retval + = (const BrigBase *) ((const char *) base + base->byteCount); + gcc_assert (retval->kind == BRIG_KIND_DIRECTIVE_VARIABLE); + + const BrigDirectiveVariable *brigVar + = (const BrigDirectiveVariable *) retval; + + brig_directive_variable_handler varhandler (m_parent); + + if (brigVar->type & BRIG_TYPE_ARRAY) + { + /* Push array output arguments to the beginning of the + function argument list instead of regular function + return values. */ + + tree arg_var = varhandler.build_variable (brigVar, PARM_DECL); + vec_safe_push (args, TREE_TYPE (arg_var)); + + m_parent.m_cf->add_arg_variable (brigVar, arg_var); + + if (arg_decls == NULL_TREE) + arg_decls = arg_var; + else + chainon (arg_decls, arg_var); + + m_parent.m_cf->add_arg_variable (brigVar, arg_var); + + ret_value = build_decl (UNKNOWN_LOCATION, RESULT_DECL, NULL_TREE, + void_type_node); + } + else + { + ret_value = varhandler.build_variable (brigVar, RESULT_DECL); + m_parent.m_cf->m_ret_value = ret_value; + ret_type = TREE_TYPE (ret_value); + m_parent.m_cf->m_ret_value_brig_var = brigVar; + } + bytes_consumed += retval->byteCount; + } + else + ret_value = build_decl (UNKNOWN_LOCATION, RESULT_DECL, NULL_TREE, + void_type_node); + + TREE_ADDRESSABLE (ret_value) = 1; + + if (exec->inArgCount > 0) + { + uint32_t arg_offset = exec->firstInArg; + for (size_t arg = 0; arg < exec->inArgCount; ++arg) + { + + const BrigDirectiveVariable *brigVar + = (const BrigDirectiveVariable *) m_parent.get_brig_code_entry + (arg_offset); + + gcc_assert (brigVar->base.kind == BRIG_KIND_DIRECTIVE_VARIABLE); + + /* Delegate to the brig_directive_variable_handler. */ + brig_directive_variable_handler varhandler (m_parent); + tree arg_var = varhandler.build_variable (brigVar, PARM_DECL); + arg_offset += brigVar->base.byteCount; + vec_safe_push (args, TREE_TYPE (arg_var)); + + m_parent.m_cf->add_arg_variable (brigVar, arg_var); + + if (arg_decls == NULL_TREE) + arg_decls = arg_var; + else + chainon (arg_decls, arg_var); + } + } + + vec_safe_push (args, ptr_type_node); + vec_safe_push (args, ptr_type_node); + + fndecl = build_decl (UNKNOWN_LOCATION, FUNCTION_DECL, name_identifier, + build_function_type_vec (ret_type, args)); + + DECL_RESULT (fndecl) = ret_value; + DECL_CONTEXT (ret_value) = fndecl; + DECL_EXTERNAL (fndecl) = 0; + DECL_ARGUMENTS (fndecl) = arg_decls; + } + + /* All functions need the hidden __context argument passed on + because they might call WI-specific functions which need + the context info. */ + tree context_arg = build_decl (UNKNOWN_LOCATION, PARM_DECL, + get_identifier ("__context"), ptr_type_node); + if (DECL_ARGUMENTS (fndecl) == NULL_TREE) + DECL_ARGUMENTS (fndecl) = context_arg; + else + chainon (DECL_ARGUMENTS (fndecl), context_arg); + DECL_CONTEXT (context_arg) = fndecl; + DECL_ARG_TYPE (context_arg) = ptr_type_node; + DECL_ARTIFICIAL (context_arg) = 1; + TREE_READONLY (context_arg) = 1; + TREE_USED (context_arg) = 1; + + /* They can also access group memory, so we need to pass the + group pointer along too. */ + tree group_base_arg + = build_decl (UNKNOWN_LOCATION, PARM_DECL, + get_identifier ("__group_base_addr"), ptr_type_node); + chainon (DECL_ARGUMENTS (fndecl), group_base_arg); + DECL_ARG_TYPE (group_base_arg) = ptr_type_node; + DECL_CONTEXT (group_base_arg) = fndecl; + DECL_ARTIFICIAL (group_base_arg) = 1; + TREE_READONLY (group_base_arg) = 1; + TREE_USED (group_base_arg) = 1; + + /* Same for private. */ + tree private_base_arg + = build_decl (UNKNOWN_LOCATION, PARM_DECL, + get_identifier ("__private_base_addr"), ptr_type_node); + chainon (DECL_ARGUMENTS (fndecl), private_base_arg); + DECL_ARG_TYPE (private_base_arg) = ptr_type_node; + DECL_CONTEXT (private_base_arg) = fndecl; + DECL_ARTIFICIAL (private_base_arg) = 1; + TREE_READONLY (private_base_arg) = 1; + TREE_USED (private_base_arg) = 1; + + DECL_SAVED_TREE (fndecl) = bind_expr; + + /* Try to preserve the functions across IPA. */ + DECL_PRESERVE_P (fndecl) = 1; + TREE_SIDE_EFFECTS (fndecl) = 1; + + TREE_ADDRESSABLE (fndecl) = 1; + + if (base->kind == BRIG_KIND_DIRECTIVE_FUNCTION) + { + TREE_STATIC (fndecl) = 1; + TREE_PUBLIC (fndecl) = 1; + } + else if (base->kind == BRIG_KIND_DIRECTIVE_KERNEL) + { + TREE_STATIC (fndecl) = 1; + TREE_PUBLIC (fndecl) = 1; + } + else if (base->kind == BRIG_KIND_DIRECTIVE_SIGNATURE) + { + TREE_STATIC (fndecl) = 0; + TREE_PUBLIC (fndecl) = 1; + DECL_EXTERNAL (fndecl) = 1; + } + else if (base->kind == BRIG_KIND_DIRECTIVE_INDIRECT_FUNCTION) + { + TREE_STATIC (fndecl) = 0; + TREE_PUBLIC (fndecl) = 1; + } + else + gcc_unreachable (); + + TREE_USED (fndecl) = 1; + DECL_ARTIFICIAL (fndecl) = 0; + + tree initial_block = make_node (BLOCK); + DECL_INITIAL (fndecl) = initial_block; + TREE_USED (DECL_INITIAL (fndecl)) = 1; + + if (ret_value != NULL_TREE && TREE_TYPE (ret_value) != void_type_node) + { + DECL_CONTEXT (ret_value) = fndecl; + DECL_CHAIN (ret_value) = BIND_EXPR_VARS (bind_expr); + BIND_EXPR_VARS (bind_expr) = ret_value; + } + + tree arg; + for (arg = DECL_ARGUMENTS (fndecl); arg != NULL_TREE; arg = TREE_CHAIN (arg)) + { + DECL_CONTEXT (arg) = fndecl; + DECL_ARG_TYPE (arg) = TREE_TYPE (arg); + } + + m_parent.add_function_decl (func_name, fndecl); + m_parent.append_global (fndecl); + + if (!is_definition) + return bytes_consumed; + + m_parent.start_function (fndecl); + + m_parent.m_cf->m_name = func_name; + m_parent.m_cf->m_func_decl = fndecl; + m_parent.m_cf->m_current_bind_expr = bind_expr; + m_parent.m_cf->m_is_kernel = is_kernel; + m_parent.m_cf->m_context_arg = context_arg; + m_parent.m_cf->m_group_base_arg = group_base_arg; + m_parent.m_cf->m_private_base_arg = private_base_arg; + + if (ret_value != NULL_TREE && TREE_TYPE (ret_value) != void_type_node) + { + /* We cannot assign to <<retval>> directly in gcc trunk. We need to + create a local temporary variable which can be stored to and when + returning from the function, we'll copy it to the actual <<retval>> + in return statement's argument. */ + tree temp_var = m_parent.m_cf->m_ret_temp + = m_parent.m_cf->add_local_variable ("_retvalue_temp", + TREE_TYPE (ret_value)); + TREE_ADDRESSABLE (temp_var) = 1; + } + + if (is_kernel) + { + m_parent.m_cf->add_id_variables (); + + /* Create a single entry point in the function. */ + m_parent.m_cf->m_entry_label_stmt + = build_stmt (LABEL_EXPR, m_parent.m_cf->label ("__kernel_entry")); + m_parent.m_cf->append_statement (m_parent.m_cf->m_entry_label_stmt); + + tree bind_expr = m_parent.m_cf->m_current_bind_expr; + tree stmts = BIND_EXPR_BODY (bind_expr); + + m_parent.m_cf->m_kernel_entry = tsi_last (stmts); + + /* Let's not append the exit label yet, but only after the + function has been built. We need to build it so it can + be referred to because returns are converted to gotos to this + label. */ + m_parent.m_cf->m_exit_label = m_parent.m_cf->label ("__kernel_exit"); + } + + return bytes_consumed; +} diff --git a/gcc/brig/brigfrontend/brig-function.cc b/gcc/brig/brigfrontend/brig-function.cc new file mode 100644 index 00000000000..5f9784c2dee --- /dev/null +++ b/gcc/brig/brigfrontend/brig-function.cc @@ -0,0 +1,723 @@ +/* brig-function.cc -- declaration of brig_function class. + Copyright (C) 2016 Free Software Foundation, Inc. + Contributed by Pekka Jaaskelainen <pekka.jaaskelainen@parmance.com> + for General Processor Tech. + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify it under + the terms of the GNU General Public License as published by the Free + Software Foundation; either version 3, or (at your option) any later + version. + + GCC is distributed in the hope that it will be useful, but WITHOUT ANY + WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + for more details. + + You should have received a copy of the GNU General Public License + along with GCC; see the file COPYING3. If not see + <http://www.gnu.org/licenses/>. */ + +#include <sstream> +#include <iomanip> + +#include "brig-function.h" +#include "stringpool.h" +#include "tree-iterator.h" +#include "toplev.h" +#include "gimplify.h" +#include "gimple-expr.h" +#include "print-tree.h" +#include "hsa-brig-format.h" +#include "stor-layout.h" +#include "diagnostic-core.h" +#include "brig-code-entry-handler.h" +#include "brig-machine.h" +#include "brig-util.h" +#include "phsa.h" +#include "tree-pretty-print.h" +#include "dumpfile.h" +#include "tree-cfg.h" +#include "errors.h" +#include "function.h" +#include "brig-to-generic.h" +#include "brig-builtins.h" + +brig_function::brig_function (const BrigDirectiveExecutable *exec, + brig_to_generic *parent) + : m_brig_def (exec), m_is_kernel (false), m_is_finished (false), m_name (""), + m_current_bind_expr (NULL_TREE), m_func_decl (NULL_TREE), + m_context_arg (NULL_TREE), m_group_base_arg (NULL_TREE), + m_private_base_arg (NULL_TREE), m_ret_value (NULL_TREE), + m_next_kernarg_offset (0), m_kernarg_max_align (0), + m_ret_value_brig_var (NULL), m_has_barriers (false), + m_has_allocas (false), m_has_function_calls_with_barriers (false), + m_calls_analyzed (false), m_is_wg_function (false), + m_has_unexpanded_dp_builtins (false), m_generating_arg_block (false), + m_parent (parent) +{ + memset (m_regs, 0, + BRIG_2_TREE_HSAIL_TOTAL_REG_COUNT * sizeof (BrigOperandRegister *)); + memset (&m_descriptor, 0, sizeof (phsa_descriptor)); +} + +brig_function::~brig_function () +{ + for (size_t i = 0; i < BRIG_2_TREE_HSAIL_TOTAL_REG_COUNT; ++i) + { + if (m_regs[i] != NULL) + { + delete m_regs[i]; + m_regs[i] = NULL; + } + } +} + +/* Returns a GENERIC label with the given name in the given function. + Creates it, if not yet found. */ + +tree +brig_function::label (const std::string &name) +{ + label_index::const_iterator i = m_label_index.find (name); + if (i == m_label_index.end ()) + { + tree name_identifier + = get_identifier_with_length (name.c_str (), name.size ()); + + tree label_decl = build_decl (UNKNOWN_LOCATION, LABEL_DECL, + name_identifier, void_type_node); + + DECL_CONTEXT (label_decl) = m_func_decl; + DECL_ARTIFICIAL (label_decl) = 0; + + m_label_index[name] = label_decl; + return label_decl; + } + else + return (*i).second; +} + +/* Record an argument variable for later use. This includes both local + variables inside arg blocks and incoming function arguments. */ + +void +brig_function::add_arg_variable (const BrigDirectiveVariable *brigVar, + tree treeDecl) +{ + m_arg_variables[brigVar] = treeDecl; +} + +tree +brig_function::arg_variable (const BrigDirectiveVariable *var) const +{ + variable_index::const_iterator i = m_arg_variables.find (var); + if (i == m_arg_variables.end ()) + return NULL_TREE; + else + return (*i).second; +} + +/* Appends a new kernel argument descriptor for the current kernel's + arg space. */ + +void +brig_function::append_kernel_arg (const BrigDirectiveVariable *var, size_t size, + size_t alignment) +{ + gcc_assert (m_func_decl != NULL_TREE); + gcc_assert (m_is_kernel); + + size_t align_padding = m_next_kernarg_offset % alignment == 0 ? + 0 : (alignment - m_next_kernarg_offset % alignment); + m_next_kernarg_offset += align_padding; + m_kernarg_offsets[var] = m_next_kernarg_offset; + m_next_kernarg_offset += size; + + m_kernarg_max_align + = m_kernarg_max_align < alignment ? alignment : m_kernarg_max_align; +} + +size_t +brig_function::kernel_arg_offset (const BrigDirectiveVariable *var) const +{ + var_offset_table::const_iterator i = m_kernarg_offsets.find (var); + gcc_assert (i != m_kernarg_offsets.end ()); + return (*i).second; +} + +/* Add work-item ID variables to the beginning of the kernel function + which can be used for address computation as kernel dispatch packet + instructions can be expanded to GENERIC nodes referring to them. */ + +void +brig_function::add_id_variables () +{ + tree bind_expr = m_current_bind_expr; + tree stmts = BIND_EXPR_BODY (bind_expr); + + /* Initialize the WG limits and local ids. */ + + tree_stmt_iterator entry = tsi_start (stmts); + + for (int i = 0; i < 3; ++i) + { + char dim_char = (char) ((int) 'x' + i); + + /* The local sizes are limited to 16b values, but let's still use 32b + to avoid unnecessary casts (the ID functions are 32b). */ + m_local_id_vars[i] + = add_local_variable (std::string ("__local_") + dim_char, + uint32_type_node); + + tree workitemid_call + = call_builtin (builtin_decl_explicit (BUILT_IN_HSAIL_WORKITEMID), 2, + uint32_type_node, uint32_type_node, + build_int_cst (uint32_type_node, i), ptr_type_node, + m_context_arg); + + tree id_init = build2 (MODIFY_EXPR, TREE_TYPE (m_local_id_vars[i]), + m_local_id_vars[i], workitemid_call); + + tsi_link_after (&entry, id_init, TSI_NEW_STMT); + + m_cur_wg_size_vars[i] + = add_local_variable (std::string ("__cur_wg_size_") + dim_char, + uint32_type_node); + + tree cwgz_call + = call_builtin + (builtin_decl_explicit (BUILT_IN_HSAIL_CURRENTWORKGROUPSIZE), + 2, uint32_type_node, uint32_type_node, + build_int_cst (uint32_type_node, i), ptr_type_node, m_context_arg); + + tree limit_init = build2 (MODIFY_EXPR, TREE_TYPE (m_cur_wg_size_vars[i]), + m_cur_wg_size_vars[i], cwgz_call); + + tsi_link_after (&entry, limit_init, TSI_NEW_STMT); + + m_wg_id_vars[i] + = add_local_variable (std::string ("__workgroupid_") + dim_char, + uint32_type_node); + + tree wgid_call + = call_builtin (builtin_decl_explicit (BUILT_IN_HSAIL_WORKGROUPID), + 2, uint32_type_node, uint32_type_node, + build_int_cst (uint32_type_node, i), ptr_type_node, + m_context_arg); + + tree wgid_init = build2 (MODIFY_EXPR, TREE_TYPE (m_wg_id_vars[i]), + m_wg_id_vars[i], wgid_call); + + tsi_link_after (&entry, wgid_init, TSI_NEW_STMT); + + m_wg_size_vars[i] + = add_local_variable (std::string ("__workgroupsize_") + dim_char, + uint32_type_node); + + tree wgsize_call + = call_builtin (builtin_decl_explicit (BUILT_IN_HSAIL_WORKGROUPSIZE), + 2, uint32_type_node, uint32_type_node, + build_int_cst (uint32_type_node, i), ptr_type_node, + m_context_arg); + + tree wgsize_init = build2 (MODIFY_EXPR, TREE_TYPE (m_wg_size_vars[i]), + m_wg_size_vars[i], wgsize_call); + + tsi_link_after (&entry, wgsize_init, TSI_NEW_STMT); + + m_grid_size_vars[i] + = add_local_variable (std::string ("__gridsize_") + dim_char, + uint32_type_node); + + tree gridsize_call + = call_builtin (builtin_decl_explicit (BUILT_IN_HSAIL_GRIDSIZE), 2, + uint32_type_node, uint32_type_node, + build_int_cst (uint32_type_node, i), ptr_type_node, + m_context_arg); + + tree gridsize_init = build2 (MODIFY_EXPR, TREE_TYPE (m_grid_size_vars[i]), + m_grid_size_vars[i], gridsize_call); + + tsi_link_after (&entry, gridsize_init, TSI_NEW_STMT); + } + + m_kernel_entry = entry; +} + +/* Creates a new local variable with the given NAME and given GENERIC + TYPE. */ + +tree +brig_function::add_local_variable (std::string name, tree type) +{ + tree name_identifier + = get_identifier_with_length (name.c_str (), name.size ()); + tree variable + = build_decl (UNKNOWN_LOCATION, VAR_DECL, name_identifier, type); + + DECL_NONLOCAL (variable) = 0; + TREE_ADDRESSABLE (variable) = 0; + TREE_STATIC (variable) = 0; + TREE_USED (variable) = 1; + DECL_ARTIFICIAL (variable) = 0; + + tree bind_expr = DECL_SAVED_TREE (m_func_decl); + + DECL_CONTEXT (variable) = m_func_decl; + + DECL_CHAIN (variable) = BIND_EXPR_VARS (bind_expr); + BIND_EXPR_VARS (bind_expr) = variable; + return variable; +} + +/* Returns a DECL_VAR for the given HSAIL operand register. + If it has not been created yet for the function being generated, + creates it as an unsigned int variable. */ + +tree +brig_function::get_m_var_declfor_reg (const BrigOperandRegister *reg) +{ + size_t offset = reg->regNum; + switch (reg->regKind) + { + case BRIG_REGISTER_KIND_QUAD: + offset + += BRIG_2_TREE_HSAIL_D_REG_COUNT + BRIG_2_TREE_HSAIL_S_REG_COUNT + + BRIG_2_TREE_HSAIL_C_REG_COUNT; + break; + case BRIG_REGISTER_KIND_DOUBLE: + offset += BRIG_2_TREE_HSAIL_S_REG_COUNT + BRIG_2_TREE_HSAIL_C_REG_COUNT; + break; + case BRIG_REGISTER_KIND_SINGLE: + offset += BRIG_2_TREE_HSAIL_C_REG_COUNT; + case BRIG_REGISTER_KIND_CONTROL: + break; + default: + gcc_unreachable (); + break; + } + + reg_decl_index_entry *regEntry = m_regs[offset]; + if (regEntry == NULL) + { + size_t reg_size = gccbrig_reg_size (reg); + tree type; + if (reg_size > 1) + type = build_nonstandard_integer_type (reg_size, true); + else + type = boolean_type_node; + + /* Drop the const qualifier so we do not end up with a read only + register variable which cannot be written to later. */ + tree nonconst_type = build_type_variant (type, false, false); + + regEntry = new reg_decl_index_entry; + + regEntry->m_var_decl + = add_local_variable (gccbrig_reg_name (reg), nonconst_type); + m_regs[offset] = regEntry; + } + return regEntry->m_var_decl; +} + +/* Builds a work-item do..while loop for a single DIM. HEADER_ENTRY is + a statement after which the iteration variables should be initialized and + the loop body starts. BRANCH_AFTER is the statement after which the loop + predicate check and the back edge goto will be appended. */ + +void +brig_function::add_wi_loop (int dim, tree_stmt_iterator *header_entry, + tree_stmt_iterator *branch_after) +{ + tree ivar = m_local_id_vars[dim]; + tree ivar_max = m_cur_wg_size_vars[dim]; + tree_stmt_iterator entry = *header_entry; + + /* TODO: this is not a parallel loop as we share the "register variables" + across work-items. Should create a copy of them per WI instance. That + is, declare temporaries for new definitions inside the loop body, not at + function scope. */ + + tree ivar_init = build2 (MODIFY_EXPR, TREE_TYPE (ivar), ivar, + build_zero_cst (TREE_TYPE (ivar))); + tsi_link_after (&entry, ivar_init, TSI_NEW_STMT); + + tree loop_body_label + = label (std::string ("__wi_loop_") + (char) ((int) 'x' + dim)); + tree loop_body_label_stmt = build_stmt (LABEL_EXPR, loop_body_label); + + tsi_link_after (&entry, loop_body_label_stmt, TSI_NEW_STMT); + + if (m_has_unexpanded_dp_builtins) + { + tree id_set_builtin + = builtin_decl_explicit (BUILT_IN_HSAIL_SETWORKITEMID); + /* Set the local ID to the current wi-loop iteration variable value to + ensure the builtins see the correct values. */ + tree id_set_call + = call_builtin (id_set_builtin, 3, + void_type_node, uint32_type_node, + build_int_cst (uint32_type_node, dim), uint32_type_node, + ivar, ptr_type_node, m_context_arg); + tsi_link_after (&entry, id_set_call, TSI_NEW_STMT); + } + + /* Increment the WI iteration variable. */ + tree incr = build2 (PREINCREMENT_EXPR, TREE_TYPE (ivar), ivar, + build_one_cst (TREE_TYPE (ivar))); + + tsi_link_after (branch_after, incr, TSI_NEW_STMT); + + /* Append the predicate check with the back edge goto. */ + tree condition = build2 (LT_EXPR, TREE_TYPE (ivar), ivar, ivar_max); + tree target_goto = build1 (GOTO_EXPR, void_type_node, loop_body_label); + tree if_stmt + = build3 (COND_EXPR, void_type_node, condition, target_goto, NULL_TREE); + tsi_link_after (branch_after, if_stmt, TSI_NEW_STMT); +} + +/* Recursively analyzes the function and its callees for barrier usage. */ + +void +brig_function::analyze_calls () +{ + if (m_calls_analyzed) + return; + + /* Set this early to not get stuck in case of recursive call graphs. + This is safe because if the function calls itself, either the function + has barrier calls which implies a call to a function with barrier calls, + or it doesn't in which case the result depends on the later called + functions. */ + m_calls_analyzed = true; + + for (size_t i = 0; i < m_called_functions.size (); ++i) + { + tree f = m_called_functions[i]; + brig_function *called_f = m_parent->get_finished_function (f); + if (called_f == NULL) + { + /* Unfinished function (only declaration within the set of BRIGs) + found. Cannot finish the CG analysis. Have to assume it does have + a barrier for safety. */ + m_has_function_calls_with_barriers = true; + m_has_unexpanded_dp_builtins = true; + break; + } + called_f->analyze_calls (); + /* We can assume m_has_barriers has been correctly set during the + construction of the function decl. No need to reanalyze it. */ + m_has_function_calls_with_barriers |= called_f->m_has_barriers; + + /* If the function or any of its called functions has dispatch + packet builtin calls that require the local id, we need to + set the local id to the context in the work item loop before + the functions are called. If we analyze the opposite, these + function calls can be omitted. */ + m_has_unexpanded_dp_builtins |= called_f->m_has_unexpanded_dp_builtins; + } +} + +/* Tries to convert the current kernel to a work-group function that executes + all work-items using loops. Returns true in case the conversion was + successful. */ + +bool +brig_function::convert_to_wg_function () +{ + if (!m_calls_analyzed) + analyze_calls (); + + if (m_has_barriers || m_has_function_calls_with_barriers) + return false; + + /* The most trivial case: No barriers at all in the kernel. + We can create one big work-item loop around the whole kernel. */ + tree bind_expr = m_current_bind_expr; + tree stmts = BIND_EXPR_BODY (bind_expr); + + for (int i = 0; i < 3; ++i) + { + /* The previous loop has added a new label to the end of the function, + the next level loop should wrap around it also. */ + tree_stmt_iterator function_exit = tsi_last (stmts); + add_wi_loop (i, &m_kernel_entry, &function_exit); + } + + m_is_wg_function = true; + return false; +} + +/* Emits a kernel description to a special ELF section so it can be + utilized by an HSA runtime implementation. The assembly block + must be emitted to a statement list of an function, which is given + as an argument. Returns the assembly block used to emit the section. */ + +tree +brig_function::emit_metadata (tree stmt_list) +{ + /* Emit an ELF section via an assembly directive that generates a special + ELF section for each kernel that contains raw bytes of a descriptor + object. This is pretty disgusting, but life is never perfect ;) */ + + /* Use the original kernel name without the '_' prefix in the section name. */ + std::string kern_name = m_is_kernel ? m_name.substr (1) : m_name; + + std::ostringstream strstr; + strstr << std::endl + << ".pushsection " << PHSA_DESC_SECTION_PREFIX << kern_name + << std::endl + << "\t.p2align 1, 1, 1" << std::endl + << "\t.byte "; + + for (size_t i = 0; i < sizeof (phsa_descriptor); ++i) + { + strstr << "0x" << std::setw (2) << std::setfill ('0') << std::hex + << (unsigned) *((unsigned char *) &m_descriptor + i); + if (i + 1 < sizeof (phsa_descriptor)) + strstr << ", "; + } + + strstr << std::endl << ".popsection" << std::endl << std::endl; + + tree metadata_asm + = build_stmt (ASM_EXPR, + build_string (strstr.str ().size (), strstr.str ().c_str ()), + NULL_TREE, NULL_TREE, NULL_TREE, NULL_TREE); + + append_to_statement_list_force (metadata_asm, &stmt_list); + return metadata_asm; +} + +/* Emits the kernel launcher function. Also emits the metadata section + creation statements in it. + + The launcher function calls the device-side runtime + that runs the kernel for all work-items. In C: + + void KernelName (void* context, void* group_base_addr) + { + __hsail_launch_kernel (_KernelName, context, group_base_addr); + } + + or, in case of a successful conversion to a work-group function: + + void KernelName (void* context, void* group_base_addr) + { + __hsail_launch_wg_function (_KernelName, context, group_base_addr); + } + + The user/host sees this function as the kernel to call from the + outside. The actual kernel generated from HSAIL was named _KernelName. +*/ + +tree +brig_function::emit_launcher_and_metadata () +{ + /* The original kernel name without the '_' prefix. */ + std::string kern_name = m_name.substr (1); + + tree name_identifier + = get_identifier_with_length (kern_name.c_str (), kern_name.size ()); + + tree launcher + = build_decl (UNKNOWN_LOCATION, FUNCTION_DECL, name_identifier, + build_function_type_list (void_type_node, ptr_type_node, + ptr_type_node, NULL_TREE)); + + TREE_USED (launcher) = 1; + DECL_ARTIFICIAL (launcher) = 1; + + tree context_arg = build_decl (UNKNOWN_LOCATION, PARM_DECL, + get_identifier ("__context"), ptr_type_node); + + DECL_ARGUMENTS (launcher) = context_arg; + DECL_ARG_TYPE (context_arg) = ptr_type_node; + DECL_CONTEXT (context_arg) = launcher; + TREE_USED (context_arg) = 1; + DECL_ARTIFICIAL (context_arg) = 1; + + tree group_base_addr_arg + = build_decl (UNKNOWN_LOCATION, PARM_DECL, + get_identifier ("__group_base_addr"), ptr_type_node); + + chainon (DECL_ARGUMENTS (launcher), group_base_addr_arg); + DECL_ARG_TYPE (group_base_addr_arg) = ptr_type_node; + DECL_CONTEXT (group_base_addr_arg) = launcher; + TREE_USED (group_base_addr_arg) = 1; + DECL_ARTIFICIAL (group_base_addr_arg) = 1; + + tree resdecl + = build_decl (UNKNOWN_LOCATION, RESULT_DECL, NULL_TREE, void_type_node); + + DECL_RESULT (launcher) = resdecl; + DECL_CONTEXT (resdecl) = launcher; + + DECL_INITIAL (launcher) = make_node (BLOCK); + TREE_USED (DECL_INITIAL (launcher)) = 1; + + tree stmt_list = alloc_stmt_list (); + + tree bind_expr = build3 (BIND_EXPR, void_type_node, NULL, stmt_list, NULL); + + TREE_STATIC (launcher) = 0; + TREE_PUBLIC (launcher) = 1; + + DECL_SAVED_TREE (launcher) = bind_expr; + + if (DECL_STRUCT_FUNCTION (launcher) == NULL) + push_struct_function (launcher); + else + push_cfun (DECL_STRUCT_FUNCTION (launcher)); + + tree kernel_func_ptr = build1 (ADDR_EXPR, ptr_type_node, m_func_decl); + + tree phsail_launch_kernel_call; + + /* Emit a launcher depending whether we converted the kernel function to + a work group function or not. */ + if (m_is_wg_function) + phsail_launch_kernel_call + = call_builtin (builtin_decl_explicit (BUILT_IN_HSAIL_LAUNCH_WG_FUNC), + 3, void_type_node, + ptr_type_node, kernel_func_ptr, ptr_type_node, + context_arg, ptr_type_node, group_base_addr_arg); + else + phsail_launch_kernel_call + = call_builtin (builtin_decl_explicit (BUILT_IN_HSAIL_LAUNCH_KERNEL), + 3, void_type_node, + ptr_type_node, kernel_func_ptr, ptr_type_node, + context_arg, ptr_type_node, group_base_addr_arg); + + append_to_statement_list_force (phsail_launch_kernel_call, &stmt_list); + + emit_metadata (stmt_list); + + return launcher; +} + +tree +brig_function::append_statement (tree stmt) +{ + gcc_assert (m_func_decl != NULL); + + tree bind_expr = m_current_bind_expr; + tree stmts = BIND_EXPR_BODY (bind_expr); + + append_to_statement_list_force (stmt, &stmts); + return stmt; +} + +/* Creates a new "alloca frame" for the current function by + injecting an alloca frame push in the beginning of the function + and an alloca frame pop before all function exit points. */ + +void +brig_function::create_alloca_frame () +{ + tree_stmt_iterator entry; + + /* Adds the alloca push only after the ids have been initialized + in case of a kernel function. */ + if (m_is_kernel) + entry = m_kernel_entry; + else + { + tree bind_expr = m_current_bind_expr; + tree stmts = BIND_EXPR_BODY (bind_expr); + entry = tsi_start (stmts); + } + + tree push_frame_builtin = builtin_decl_explicit (BUILT_IN_HSAIL_PUSH_FRAME); + tree push_frame_call + = call_builtin (push_frame_builtin, 1, void_type_node, ptr_type_node, + m_context_arg); + + tsi_link_before (&entry, push_frame_call, TSI_NEW_STMT); + + tree pop_frame_builtin = builtin_decl_explicit (BUILT_IN_HSAIL_POP_FRAME); + + do + { + tree stmt = tsi_stmt (entry); + if (TREE_CODE (stmt) == RETURN_EXPR) + { + tree pop_frame_call + = call_builtin (pop_frame_builtin, 1, void_type_node, + ptr_type_node, m_context_arg); + + tsi_link_before (&entry, pop_frame_call, TSI_SAME_STMT); + } + tsi_next (&entry); + } + while (!tsi_end_p (entry)); +} + +/* Finishes the currently built function. After calling this, no new + statements should be appeneded to the function. */ +void +brig_function::finish () +{ + append_return_stmt (); + + /* Currently assume single alloca frame per WG. */ + if (m_has_allocas) + create_alloca_frame (); +} + +void +brig_function::finish_kernel () +{ + /* Kernel functions should have a single exit point. + Let's create one. The return instructions should have + been converted to branches to this label. */ + append_statement (build_stmt (LABEL_EXPR, m_exit_label)); + /* Attempt to convert the kernel to a work-group function that + executes all work-items of the WG using a loop. */ + convert_to_wg_function (); + + append_return_stmt (); + + /* Currently assume single alloca frame per WG. */ + if (m_has_allocas) + create_alloca_frame (); +} + +void +brig_function::append_return_stmt () +{ + gcc_assert (m_current_bind_expr != NULL_TREE); + tree stmts = BIND_EXPR_BODY (m_current_bind_expr); + + if (STATEMENT_LIST_TAIL (stmts) == NULL) + return; /* Empty function. */ + + tree last_stmt = tsi_stmt (tsi_last (stmts)); + + if (TREE_CODE (last_stmt) == RETURN_EXPR) + return; + + if (m_ret_value != NULL_TREE) + { + tree result_assign + = build2 (MODIFY_EXPR, TREE_TYPE (m_ret_value), m_ret_value, + m_ret_temp); + + tree return_expr + = build1 (RETURN_EXPR, TREE_TYPE (result_assign), result_assign); + append_to_statement_list_force (return_expr, &stmts); + } + else + { + tree return_stmt = build_stmt (RETURN_EXPR, NULL); + append_to_statement_list_force (return_stmt, &stmts); + } +} + +bool +brig_function::has_function_scope_var (const BrigBase* var) const +{ + return m_function_scope_vars.find (var) != m_function_scope_vars.end (); +} diff --git a/gcc/brig/brigfrontend/brig-function.h b/gcc/brig/brigfrontend/brig-function.h new file mode 100644 index 00000000000..81c3f89ecde --- /dev/null +++ b/gcc/brig/brigfrontend/brig-function.h @@ -0,0 +1,213 @@ +/* brig-function.h -- declaration of brig_function class. + Copyright (C) 2016 Free Software Foundation, Inc. + Contributed by Pekka Jaaskelainen <pekka.jaaskelainen@parmance.com> + for General Processor Tech. + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify it under + the terms of the GNU General Public License as published by the Free + Software Foundation; either version 3, or (at your option) any later + version. + + GCC is distributed in the hope that it will be useful, but WITHOUT ANY + WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + for more details. + + You should have received a copy of the GNU General Public License + along with GCC; see the file COPYING3. If not see + <http://www.gnu.org/licenses/>. */ + +#ifndef BRIG_FUNCTION_H +#define BRIG_FUNCTION_H + +#include "config.h" +#include "system.h" +#include "ansidecl.h" +#include "coretypes.h" +#include "opts.h" +#include "tree.h" +#include "tree-iterator.h" +#include "hsa-brig-format.h" + +class brig_to_generic; + +#include <map> +#include <string> +#include <vector> +#include <set> + +#include "phsa.h" + +typedef std::map<std::string, tree> label_index; +typedef std::map<const BrigDirectiveVariable *, tree> variable_index; +typedef std::vector<tree> tree_stl_vec; + +/* There are 128 c regs and 2048 s/d/q regs each in the HSAIL. */ +#define BRIG_2_TREE_HSAIL_C_REG_COUNT (128) +#define BRIG_2_TREE_HSAIL_S_REG_COUNT (2048) +#define BRIG_2_TREE_HSAIL_D_REG_COUNT (2048) +#define BRIG_2_TREE_HSAIL_Q_REG_COUNT (2048) +#define BRIG_2_TREE_HSAIL_TOTAL_REG_COUNT \ + (BRIG_2_TREE_HSAIL_C_REG_COUNT + BRIG_2_TREE_HSAIL_S_REG_COUNT \ + + BRIG_2_TREE_HSAIL_D_REG_COUNT + BRIG_2_TREE_HSAIL_Q_REG_COUNT) + +/* Holds data for the currently built GENERIC function. */ + +class brig_function +{ +public: + typedef std::map<const BrigDirectiveVariable *, size_t> var_offset_table; + +private: + struct reg_decl_index_entry + { + tree m_var_decl; + }; + +public: + brig_function (const BrigDirectiveExecutable *exec, brig_to_generic *parent); + ~brig_function (); + + tree arg_variable (const BrigDirectiveVariable *var) const; + void add_arg_variable (const BrigDirectiveVariable *brigVar, tree treeDecl); + + void append_kernel_arg (const BrigDirectiveVariable *var, size_t size, + size_t alignment); + + size_t kernel_arg_offset (const BrigDirectiveVariable *var) const; + + void add_id_variables (); + + tree label (const std::string &name); + + tree add_local_variable (std::string name, tree type); + + tree get_m_var_declfor_reg (const BrigOperandRegister *reg); + + bool convert_to_wg_function (); + + void add_wi_loop (int dim, tree_stmt_iterator *header_entry, + tree_stmt_iterator *branch_after); + + tree emit_metadata (tree stmt_list); + tree emit_launcher_and_metadata (); + + tree append_statement (tree stmt); + + void create_alloca_frame (); + + void finish (); + void finish_kernel (); + + void append_return_stmt (); + + bool has_function_scope_var (const BrigBase* var) const; + + void analyze_calls (); + + const BrigDirectiveExecutable *m_brig_def; + + bool m_is_kernel; + bool m_is_finished; + std::string m_name; + tree m_current_bind_expr; + tree m_func_decl; + tree m_entry_label_stmt; + tree m_exit_label; + + /* The __context function argument. */ + tree m_context_arg; + /* The __group_base_ptr argument in the current function. + Points to the start of the group segment for the kernel + instance. */ + tree m_group_base_arg; + /* The __private_base_ptr argument in the current function. + Points to the start of the private segment. */ + tree m_private_base_arg; + + /* The return value variable for the current function. */ + tree m_ret_value; + + /* The offsets of the kernel arguments in the __arg blob + pointing to the kernel argument space. */ + size_t m_next_kernarg_offset; + + /* The largest kernel argument variable alignment. */ + size_t m_kernarg_max_align; + + var_offset_table m_kernarg_offsets; + + /* Argument variables in the currently handled binding expression + (argument segment). */ + variable_index m_arg_variables; + + /* The brig variable for the function return value. */ + const BrigDirectiveVariable *m_ret_value_brig_var; + + /* The function local temporary variable for the return value. */ + tree m_ret_temp; + + /* Labels in the current function are collected here so we can refer + to them from jumps before they have been placed to the function. */ + label_index m_label_index; + + /* If the kernel contains at least one barrier, this is set to true. */ + bool m_has_barriers; + + /* True if the function has at least one alloca instruction. */ + bool m_has_allocas; + + /* If the kernel containts at least one function call that _may_ + contain a barrier call, this is set to true. */ + bool m_has_function_calls_with_barriers; + + /* Set to true after this function has been analyzed for barrier and + dispatch packet instruction usage in the final call graph analysis. */ + bool m_calls_analyzed; + + /* True in case the function was successfully converted to a WG function. */ + bool m_is_wg_function; + + /* Work-item ID related variables are cached in the entry of the kernel + function in order to use them directly in address computations, leading + to more efficient optimizations. The references to the local variables + are stored here. */ + tree m_local_id_vars[3]; + tree m_cur_wg_size_vars[3]; + tree m_wg_id_vars[3]; + tree m_wg_size_vars[3]; + tree m_grid_size_vars[3]; + + /* Set to true in case the kernel contains at least one dispatch packet + (work-item ID-related) builtin call that could not be expanded to + tree nodes. */ + bool m_has_unexpanded_dp_builtins; + + /* Points to the instruction after which the real kernel code starts. + Usually points to the last WI ID variable initialization statement. */ + tree_stmt_iterator m_kernel_entry; + + /* True if we are currently generating the contents of an arg block. */ + bool m_generating_arg_block; + + /* A collection of function scope variables seen so far for resolving + variable references vs. module scope declarations. */ + std::set<const BrigBase*> m_function_scope_vars; + + /* The functions called by this function. */ + std::vector<tree> m_called_functions; + + brig_to_generic *m_parent; + /* The metadata of the function that should be stored with the binary and + passed to the HSA runtime: */ + phsa_descriptor m_descriptor; + +private: + /* Bookkeeping for the different HSA registers and their tree declarations + for the currently generated function. */ + reg_decl_index_entry *m_regs[BRIG_2_TREE_HSAIL_TOTAL_REG_COUNT]; +}; + +#endif diff --git a/gcc/brig/brigfrontend/brig-inst-mod-handler.cc b/gcc/brig/brigfrontend/brig-inst-mod-handler.cc new file mode 100644 index 00000000000..8cd55035b49 --- /dev/null +++ b/gcc/brig/brigfrontend/brig-inst-mod-handler.cc @@ -0,0 +1,58 @@ +/* brig-inst-mod-handler.cc -- brig rounding moded instruction handling + Copyright (C) 2016 Free Software Foundation, Inc. + Contributed by Pekka Jaaskelainen <pekka.jaaskelainen@parmance.com> + for General Processor Tech. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 3, or (at your option) any later +version. + +GCC is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +<http://www.gnu.org/licenses/>. */ + +#include "brig-code-entry-handler.h" + +#include "gimple-expr.h" +#include "errors.h" + +size_t +brig_inst_mod_handler::generate (const BrigBase *base) +{ + brig_basic_inst_handler basic_handler (m_parent); + return basic_handler (base); +} + +const BrigAluModifier8_t * +brig_inst_mod_handler::modifier (const BrigBase *base) const +{ + const BrigInstMod *inst = (const BrigInstMod *) base; + return &inst->modifier; +} + +const BrigRound8_t * +brig_inst_mod_handler::round (const BrigBase *base) const +{ + const BrigInstMod *inst = (const BrigInstMod *) base; + return &inst->round; +} + +/* This used to inject fesetround () calls to control the rounding mode of the + actual executed floating point operation. It turned out that supporting + conversions using fesetround calls won't work in gcc due to it not being + able to restrict code motions across calls at the moment. This + functionality is therefore disabled for now until a better solution is + found or if fesetround () is fixed in gcc. */ +size_t +brig_inst_mod_handler::operator () (const BrigBase *base) +{ + return generate (base); +} diff --git a/gcc/brig/brigfrontend/brig-label-handler.cc b/gcc/brig/brigfrontend/brig-label-handler.cc new file mode 100644 index 00000000000..890cf5b25d6 --- /dev/null +++ b/gcc/brig/brigfrontend/brig-label-handler.cc @@ -0,0 +1,37 @@ +/* brig-label-handler.cc -- brig label directive handling + Copyright (C) 2016 Free Software Foundation, Inc. + Contributed by Pekka Jaaskelainen <pekka.jaaskelainen@parmance.com> + for General Processor Tech. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 3, or (at your option) any later +version. + +GCC is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +<http://www.gnu.org/licenses/>. */ + +#include "brig-code-entry-handler.h" + +size_t +brig_directive_label_handler::operator () (const BrigBase *base) +{ + const BrigDirectiveLabel *brig_label = (const BrigDirectiveLabel *) base; + + const BrigData *label_name = m_parent.get_brig_data_entry (brig_label->name); + + std::string label_str ((const char *) (label_name->bytes), + label_name->byteCount); + + tree stmt = build_stmt (LABEL_EXPR, m_parent.m_cf->label (label_str)); + m_parent.m_cf->append_statement (stmt); + return base->byteCount; +} diff --git a/gcc/brig/brigfrontend/brig-lane-inst-handler.cc b/gcc/brig/brigfrontend/brig-lane-inst-handler.cc new file mode 100644 index 00000000000..5cfe8d96716 --- /dev/null +++ b/gcc/brig/brigfrontend/brig-lane-inst-handler.cc @@ -0,0 +1,84 @@ +/* brig-lane-inst-handler.cc -- brig lane instruction handling + Copyright (C) 2016 Free Software Foundation, Inc. + Contributed by Pekka Jaaskelainen <pekka.jaaskelainen@parmance.com> + for General Processor Tech. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 3, or (at your option) any later +version. + +GCC is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +<http://www.gnu.org/licenses/>. */ + +#include "brig-code-entry-handler.h" +#include "errors.h" +#include "diagnostic-core.h" +#include "brig-util.h" + +brig_lane_inst_handler::brig_lane_inst_handler (brig_to_generic &parent) + : brig_code_entry_handler (parent) +{ +} + +size_t +brig_lane_inst_handler::operator () (const BrigBase *base) +{ + const BrigInstLane &inst = *(const BrigInstLane *) base; + tree_stl_vec operands = build_operands (inst.base); + + tree expr = NULL_TREE; + if (inst.base.opcode == BRIG_OPCODE_ACTIVELANECOUNT) + { + /* Because we are fixed to single WI per wave, it's enough to + just check the src value of the single work item itself. */ + expr = build2 (NE_EXPR, uint32_type_node, + build_zero_cst (uint32_type_node), operands[1]); + } + else if (inst.base.opcode == BRIG_OPCODE_ACTIVELANEID) + { + expr = build_zero_cst (uint32_type_node); + } + else if (inst.base.opcode == BRIG_OPCODE_ACTIVELANEMASK) + { + tree u64_type = gccbrig_tree_type_for_hsa_type (BRIG_TYPE_U64); + tree zero_cst = build_zero_cst (u64_type); + expr = build2 (NE_EXPR, u64_type, zero_cst, operands[1]); + + tree_stl_vec elements; + elements.push_back (expr); + elements.push_back (zero_cst); + elements.push_back (zero_cst); + elements.push_back (zero_cst); + + expr = pack (elements); + } + else if (inst.base.opcode == BRIG_OPCODE_ACTIVELANEPERMUTE) + { + tree src = operands[1]; + tree identity = operands[3]; + tree use_identity = operands[4]; + + /* When WAVESIZE is 1, we either select the src of the work-item + itself or 'identity' in case use_identity is 1. */ + + tree cmp = build2 (EQ_EXPR, uint32_type_node, + build_int_cstu (uint32_type_node, 1), use_identity); + + expr = build3 (COND_EXPR, TREE_TYPE (src), cmp, identity, src); + } + else + gcc_unreachable (); + + build_output_assignment (inst.base, operands[0], expr); + + return base->byteCount; +} diff --git a/gcc/brig/brigfrontend/brig-machine.c b/gcc/brig/brigfrontend/brig-machine.c new file mode 100644 index 00000000000..62f07f7a8a6 --- /dev/null +++ b/gcc/brig/brigfrontend/brig-machine.c @@ -0,0 +1,44 @@ +/* brig-machine.c -- gccbrig machine queries + Copyright (C) 2016 Free Software Foundation, Inc. + Contributed by Pekka Jaaskelainen <pekka.jaaskelainen@parmance.com> + for General Processor Tech. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 3, or (at your option) any later +version. + +GCC is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +<http://www.gnu.org/licenses/>. */ + +#include "config.h" +#include "system.h" +#include "coretypes.h" +#include "brig-machine.h" + +/* Return the numerical address space id for the segment in the current + target. Currently a dummy function that always returns 0, serves as + a placeholder for multi-AS machines. */ + +unsigned +gccbrig_get_target_addr_space_id (BrigSegment8_t) +{ + return 0; +} + +/* Return the WAVESIZE for the current target. For now a dummy placeholder + returning always 1. */ + +unsigned +gccbrig_get_target_wavesize () +{ + return 1; +} diff --git a/gcc/brig/brigfrontend/brig-machine.h b/gcc/brig/brigfrontend/brig-machine.h new file mode 100644 index 00000000000..96efbf6153e --- /dev/null +++ b/gcc/brig/brigfrontend/brig-machine.h @@ -0,0 +1,33 @@ +/* brig-machine.h -- gccbrig machine queries + Copyright (C) 2016 Free Software Foundation, Inc. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 3, or (at your option) any later +version. + +GCC is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +<http://www.gnu.org/licenses/>. */ + +#ifndef GCC_BRIG_MACHINE_H +#define GCC_BRIG_MACHINE_H + +#include "hsa-brig-format.h" + +/* These functions should be eventually converted to machine info queries and + redefined at backends. At that point make these functions delegate to + those. */ + +unsigned gccbrig_get_target_addr_space_id (BrigSegment8_t segment); + +unsigned gccbrig_get_target_wavesize (); + +#endif diff --git a/gcc/brig/brigfrontend/brig-mem-inst-handler.cc b/gcc/brig/brigfrontend/brig-mem-inst-handler.cc new file mode 100644 index 00000000000..dfd336933be --- /dev/null +++ b/gcc/brig/brigfrontend/brig-mem-inst-handler.cc @@ -0,0 +1,180 @@ +/* brig-mem-inst-handler.cc -- brig memory inst handler + Copyright (C) 2016 Free Software Foundation, Inc. + Contributed by Pekka Jaaskelainen <pekka.jaaskelainen@parmance.com> + for General Processor Tech. + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify it under + the terms of the GNU General Public License as published by the Free + Software Foundation; either version 3, or (at your option) any later + version. + + GCC is distributed in the hope that it will be useful, but WITHOUT ANY + WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + for more details. + + You should have received a copy of the GNU General Public License + along with GCC; see the file COPYING3. If not see + <http://www.gnu.org/licenses/>. */ + +#include "brig-code-entry-handler.h" + +#include "errors.h" +#include "brig-util.h" +#include "gimple-expr.h" +#include "print-tree.h" +#include "tree-pretty-print.h" +#include "convert.h" +#include "diagnostic-core.h" + +tree +brig_mem_inst_handler::build_mem_access (const BrigInstBase *brig_inst, + tree addr, tree data) +{ + bool is_load = brig_inst->opcode == BRIG_OPCODE_LD; + bool is_store = brig_inst->opcode == BRIG_OPCODE_ST; + + if (!is_load && !is_store) + gcc_unreachable (); + + tree instr_type = gccbrig_tree_type_for_hsa_type (brig_inst->type); + + if (VECTOR_TYPE_P (TREE_TYPE (data))) + instr_type = TREE_TYPE (data); + + tree ptype = build_pointer_type (instr_type); + + /* The HSAIL mem instructions are unaligned by default. + TODO: exploit the align modifier, it should lead to faster code. + */ + tree unaligned_type = build_aligned_type (instr_type, 8); + + /* Create a mem ref from the previous result, without offset. */ + tree mem_ref + = build2 (MEM_REF, unaligned_type, addr, build_int_cst (ptype, 0)); + + if (is_load) + { + /* Add a temporary variable so there won't be multiple + reads in case of vector unpack. */ + mem_ref = add_temp_var ("mem_read", mem_ref); + return build_output_assignment (*brig_inst, data, mem_ref); + } + else + { + tree stmt = build2 (MODIFY_EXPR, TREE_TYPE (mem_ref), mem_ref, data); + return m_parent.m_cf->append_statement (stmt); + } + return mem_ref; +} + +size_t +brig_mem_inst_handler::operator () (const BrigBase *base) +{ + const BrigInstBase *brig_inst + = (const BrigInstBase *) &((const BrigInstBasic *) base)->base; + + if (brig_inst->opcode == BRIG_OPCODE_ALLOCA) + { + tree_stl_vec operands = build_operands (*brig_inst); + size_t alignment = 1; + const BrigInstMem *mem_inst = (const BrigInstMem *) brig_inst; + if (mem_inst->align != BRIG_ALIGNMENT_NONE) + { + alignment = 1 << (mem_inst->align - 1); + } + + tree align_opr = build_int_cstu (size_type_node, alignment); + tree_stl_vec inputs; + inputs.push_back (operands[1]); + inputs.push_back (align_opr); + tree builtin_call + = expand_or_call_builtin (BRIG_OPCODE_ALLOCA, BRIG_TYPE_U32, + uint32_type_node, inputs); + build_output_assignment (*brig_inst, operands[0], builtin_call); + m_parent.m_cf->m_has_allocas = true; + return base->byteCount; + } + + tree instr_type = gccbrig_tree_type_for_hsa_type (brig_inst->type); + + const BrigData *operand_entries + = m_parent.get_brig_data_entry (brig_inst->operands); + + uint32_t data_operand_offset; + memcpy (&data_operand_offset, &operand_entries->bytes, 4); + + const BrigBase *operand + = m_parent.get_brig_operand_entry (data_operand_offset); + + const BrigData *operandData = NULL; + + bool is_store = brig_inst->opcode == BRIG_OPCODE_ST; + + bool is_three_element_vector_access + = operand->kind == BRIG_KIND_OPERAND_OPERAND_LIST + && (operandData = m_parent.get_brig_data_entry + (((const BrigOperandOperandList *) operand)->elements)) + && operandData->byteCount / 4 == 3; + + if (is_three_element_vector_access) + { + /* We need to scalarize the 3-element vector accesses here + because gcc assumes the GENERIC vector datatypes are of two exponent + size internally. */ + size_t bytes = operandData->byteCount; + const BrigOperandOffset32_t *operand_ptr + = (const BrigOperandOffset32_t *) operandData->bytes; + + uint32_t addr_operand_offset; + memcpy (&addr_operand_offset, &operand_entries->bytes + 4, 4); + + const BrigOperandAddress *addr_operand + = (const BrigOperandAddress *) m_parent.get_brig_operand_entry + (addr_operand_offset); + + tree address_base = build_address_operand (*brig_inst, *addr_operand); + + uint32_t address_offset = 0; + while (bytes > 0) + { + BrigOperandOffset32_t offset = *operand_ptr; + const BrigBase *operand_element + = m_parent.get_brig_operand_entry (offset); + tree data + = build_tree_operand (*brig_inst, *operand_element, instr_type); + + tree ptr_offset = build_int_cst (size_type_node, address_offset); + tree address = build2 (POINTER_PLUS_EXPR, TREE_TYPE (address_base), + address_base, ptr_offset); + + if (is_store && TREE_TYPE (data) != instr_type) + { + if (int_size_in_bytes (TREE_TYPE (data)) + == int_size_in_bytes (instr_type) + && !INTEGRAL_TYPE_P (instr_type)) + data = build1 (VIEW_CONVERT_EXPR, instr_type, data); + else + data = convert (instr_type, data); + } + + build_mem_access (brig_inst, address, data); + + address_offset += int_size_in_bytes (instr_type); + ++operand_ptr; + bytes -= 4; + } + } + else + { + tree_stl_vec operands = build_operands (*brig_inst); + + tree &data = operands.at (0); + tree &addr = operands.at (1); + build_mem_access (brig_inst, addr, data); + } + + return base->byteCount; +} diff --git a/gcc/brig/brigfrontend/brig-module-handler.cc b/gcc/brig/brigfrontend/brig-module-handler.cc new file mode 100644 index 00000000000..2c25189e0c6 --- /dev/null +++ b/gcc/brig/brigfrontend/brig-module-handler.cc @@ -0,0 +1,41 @@ +/* brig-module-handler.cc -- brig module directive handling + Copyright (C) 2016 Free Software Foundation, Inc. + Contributed by Pekka Jaaskelainen <pekka.jaaskelainen@parmance.com> + for General Processor Tech. + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify it under + the terms of the GNU General Public License as published by the Free + Software Foundation; either version 3, or (at your option) any later + version. + + GCC is distributed in the hope that it will be useful, but WITHOUT ANY + WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + for more details. + + You should have received a copy of the GNU General Public License + along with GCC; see the file COPYING3. If not see + <http://www.gnu.org/licenses/>. */ + +#include "brig-code-entry-handler.h" +#include "diagnostic-core.h" + +size_t +brig_directive_module_handler::operator () (const BrigBase *base) +{ + const BrigDirectiveModule* mod = (const BrigDirectiveModule*)base; + m_parent.m_module_name = m_parent.get_string (mod->name).substr (1); + if (mod->hsailMajor != 1 || mod->hsailMinor != 0) + fatal_error (UNKNOWN_LOCATION, PHSA_ERROR_PREFIX_INCOMPATIBLE_MODULE " " + "HSAIL version not supported. HSAIL 1.0 required."); + if (mod->machineModel != BRIG_MACHINE_LARGE) + fatal_error (UNKNOWN_LOCATION, PHSA_ERROR_PREFIX_INCOMPATIBLE_MODULE " " + "Only HSA 'large' machine model supported."); + /* Do not check for the profile as the runtime conformance suite tests + with 'full' profile BRIGs even though they don't use any full profile + features. This allows us to run the conformance suite with the + BRIG FE. */ + return base->byteCount; +} diff --git a/gcc/brig/brigfrontend/brig-queue-inst-handler.cc b/gcc/brig/brigfrontend/brig-queue-inst-handler.cc new file mode 100644 index 00000000000..eaf9d8dcc13 --- /dev/null +++ b/gcc/brig/brigfrontend/brig-queue-inst-handler.cc @@ -0,0 +1,93 @@ +/* brig-queue-inst-handler.cc -- brig user mode queue related instruction + handling + Copyright (C) 2016 Free Software Foundation, Inc. + Contributed by Pekka Jaaskelainen <pekka.jaaskelainen@parmance.com> + for General Processor Tech. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 3, or (at your option) any later +version. + +GCC is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +<http://www.gnu.org/licenses/>. */ + +#include <sstream> + +#include "brig-code-entry-handler.h" +#include "brig-util.h" +#include "convert.h" +#include "tree-pretty-print.h" +#include "errors.h" +#include "diagnostic-core.h" +#include "brig-builtins.h" + +brig_queue_inst_handler::brig_queue_inst_handler (brig_to_generic &parent) + : brig_code_entry_handler (parent) +{ +} + +size_t +brig_queue_inst_handler::operator () (const BrigBase *base) +{ + const BrigInstBase &inst_base = *(const BrigInstBase *) base; + + tree_stl_vec operands = build_operands (inst_base); + + if (inst_base.opcode == BRIG_OPCODE_LDQUEUEWRITEINDEX + || inst_base.opcode == BRIG_OPCODE_LDQUEUEREADINDEX) + { + tree builtin + = inst_base.opcode == BRIG_OPCODE_LDQUEUEWRITEINDEX + ? builtin_decl_explicit (BUILT_IN_HSAIL_LDQUEUEWRITEINDEX) + : builtin_decl_explicit (BUILT_IN_HSAIL_LDQUEUEREADINDEX); + + tree expr + = call_builtin (builtin, 1, uint64_type_node, + uint64_type_node, operands[1]); + build_output_assignment (inst_base, operands[0], expr); + } + else if (inst_base.opcode == BRIG_OPCODE_STQUEUEWRITEINDEX + || inst_base.opcode == BRIG_OPCODE_STQUEUEREADINDEX) + { + tree builtin + = inst_base.opcode == BRIG_OPCODE_STQUEUEWRITEINDEX + ? builtin_decl_explicit (BUILT_IN_HSAIL_STQUEUEWRITEINDEX) + : builtin_decl_explicit (BUILT_IN_HSAIL_STQUEUEREADINDEX); + + call_builtin (builtin, 2, void_type_node, + uint64_type_node, operands[0], uint64_type_node, + operands[1]); + } + else if (inst_base.opcode == BRIG_OPCODE_ADDQUEUEWRITEINDEX) + { + tree builtin = builtin_decl_explicit (BUILT_IN_HSAIL_ADDQUEUEWRITEINDEX); + + tree expr = call_builtin (builtin, 2, + uint64_type_node, uint64_type_node, operands[1], + uint64_type_node, operands[2]); + build_output_assignment (inst_base, operands[0], expr); + } + else if (inst_base.opcode == BRIG_OPCODE_CASQUEUEWRITEINDEX) + { + tree builtin = builtin_decl_explicit (BUILT_IN_HSAIL_CASQUEUEWRITEINDEX); + + tree expr + = call_builtin (builtin, 3, uint64_type_node, + uint64_type_node, operands[1], uint64_type_node, + operands[2], uint64_type_node, operands[3]); + build_output_assignment (inst_base, operands[0], expr); + } + else + gcc_unreachable (); + + return base->byteCount; +} diff --git a/gcc/brig/brigfrontend/brig-seg-inst-handler.cc b/gcc/brig/brigfrontend/brig-seg-inst-handler.cc new file mode 100644 index 00000000000..e6801146cc7 --- /dev/null +++ b/gcc/brig/brigfrontend/brig-seg-inst-handler.cc @@ -0,0 +1,146 @@ +/* brig-seg-inst-handler.cc -- brig segment related instruction handling + Copyright (C) 2016 Free Software Foundation, Inc. + Contributed by Pekka Jaaskelainen <pekka.jaaskelainen@parmance.com> + for General Processor Tech. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 3, or (at your option) any later +version. + +GCC is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +<http://www.gnu.org/licenses/>. */ + +#include <sstream> + +#include "brig-code-entry-handler.h" +#include "brig-util.h" +#include "convert.h" +#include "tree-pretty-print.h" +#include "errors.h" +#include "diagnostic-core.h" + +brig_seg_inst_handler::brig_seg_inst_handler (brig_to_generic &parent) + : brig_code_entry_handler (parent) +{ +} + +size_t +brig_seg_inst_handler::operator () (const BrigBase *base) +{ + const BrigInstBase &inst_base = *(const BrigInstBase *) base; + + std::vector<tree> operands = build_operands (inst_base); + + tree expr = NULL_TREE; + + if (inst_base.opcode == BRIG_OPCODE_STOF) + { + const BrigInstSegCvt &inst = *(const BrigInstSegCvt *) base; + + if (inst.segment == BRIG_SEGMENT_GROUP) + expr = build2 (PLUS_EXPR, size_type_node, + convert_to_integer (size_type_node, + m_parent.m_cf->m_group_base_arg), + convert_to_integer (size_type_node, operands[1])); + else if (inst.segment == BRIG_SEGMENT_PRIVATE + || inst.segment == BRIG_SEGMENT_SPILL) + expr = build2 (PLUS_EXPR, size_type_node, + convert_to_integer (size_type_node, + m_parent.m_cf->m_private_base_arg), + convert_to_integer (size_type_node, operands[1])); + else + gcc_unreachable (); + + if (!(inst.modifier & BRIG_SEG_CVT_NONULL)) + { + /* Need to convert the null value. -1 is used for 32b segments, + and 0 for flat/global. */ + tree cmp + = build2 (EQ_EXPR, uint32_type_node, + build_int_cstu (uint32_type_node, -1), operands[1]); + + tree null_check = build3 (COND_EXPR, size_type_node, cmp, + build_int_cstu (size_type_node, 0), expr); + + expr = null_check; + } + } + else if (inst_base.opcode == BRIG_OPCODE_FTOS) + { + const BrigInstSegCvt &inst = *(const BrigInstSegCvt *) base; + + if (inst.segment == BRIG_SEGMENT_GROUP) + expr = build2 (MINUS_EXPR, size_type_node, + convert_to_integer (size_type_node, + m_parent.m_cf->m_group_base_arg), + convert_to_integer (size_type_node, operands[1])); + else if (inst.segment == BRIG_SEGMENT_PRIVATE) + expr = build2 (MINUS_EXPR, size_type_node, + convert_to_integer (size_type_node, + m_parent.m_cf->m_private_base_arg), + convert_to_integer (size_type_node, operands[1])); + else + gcc_unreachable (); + + if (!(inst.modifier & BRIG_SEG_CVT_NONULL)) + { + /* Need to convert the null value. -1 is used for 32b segments, + and 0 for flat/global. */ + tree cmp = build2 (EQ_EXPR, size_type_node, + build_int_cstu (size_type_node, 0), operands[1]); + + tree null_check + = build3 (COND_EXPR, size_type_node, cmp, + build_int_cstu (uint32_type_node, -1), expr); + expr = null_check; + } + } + else if (inst_base.opcode == BRIG_OPCODE_NULLPTR) + { + const BrigInstSeg &inst = *(const BrigInstSeg *) base; + if (inst.segment == BRIG_SEGMENT_GLOBAL + || inst.segment == BRIG_SEGMENT_FLAT + || inst.segment == BRIG_SEGMENT_READONLY) + expr = build_int_cstu (uint64_type_node, 0); + else + expr = build_int_cstu (uint32_type_node, -1); + } + else if (inst_base.opcode == BRIG_OPCODE_SEGMENTP) + { + const BrigInstSegCvt &inst = *(const BrigInstSegCvt *) base; + + tree builtin = NULL_TREE; + switch (inst.segment) + { + case BRIG_SEGMENT_GLOBAL: + builtin = builtin_decl_explicit (BUILT_IN_HSAIL_SEGMENTP_GLOBAL); + break; + case BRIG_SEGMENT_GROUP: + builtin = builtin_decl_explicit (BUILT_IN_HSAIL_SEGMENTP_GROUP); + break; + case BRIG_SEGMENT_PRIVATE: + builtin = builtin_decl_explicit (BUILT_IN_HSAIL_SEGMENTP_PRIVATE); + break; + default: + gcc_unreachable (); + } + + expr = call_builtin (builtin, 2, + uint32_type_node, uint64_type_node, operands[1], + ptr_type_node, m_parent.m_cf->m_context_arg); + } + else + gcc_unreachable (); + + build_output_assignment (inst_base, operands[0], expr); + return base->byteCount; +} diff --git a/gcc/brig/brigfrontend/brig-signal-inst-handler.cc b/gcc/brig/brigfrontend/brig-signal-inst-handler.cc new file mode 100644 index 00000000000..5dd2268a9ce --- /dev/null +++ b/gcc/brig/brigfrontend/brig-signal-inst-handler.cc @@ -0,0 +1,42 @@ +/* brig-signal-inst-handler.cc -- brig signal instruction handling + Copyright (C) 2016 Free Software Foundation, Inc. + Contributed by Pekka Jaaskelainen <pekka.jaaskelainen@parmance.com> + for General Processor Tech. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 3, or (at your option) any later +version. + +GCC is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +<http://www.gnu.org/licenses/>. */ + +#include <sstream> + +#include "brig-code-entry-handler.h" +#include "brig-util.h" +#include "fold-const.h" +#include "diagnostic.h" +#include "tree-pretty-print.h" +#include "print-tree.h" +#include "convert.h" +#include "langhooks.h" +#include "gimple-expr.h" + +size_t +brig_signal_inst_handler::operator () (const BrigBase *base) +{ + const BrigInstSignal *inst = (const BrigInstSignal *) base; + BrigAtomicOperation8_t atomic_opcode; + atomic_opcode = inst->signalOperation; + + return generate_tree (inst->base, atomic_opcode); +} diff --git a/gcc/brig/brigfrontend/brig-to-generic.cc b/gcc/brig/brigfrontend/brig-to-generic.cc new file mode 100644 index 00000000000..d3a67864179 --- /dev/null +++ b/gcc/brig/brigfrontend/brig-to-generic.cc @@ -0,0 +1,796 @@ +/* brig2tree.cc -- brig to gcc generic/gimple tree conversion + Copyright (C) 2016 Free Software Foundation, Inc. + Contributed by Pekka Jaaskelainen <pekka.jaaskelainen@parmance.com> + for General Processor Tech. + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify it under + the terms of the GNU General Public License as published by the Free + Software Foundation; either version 3, or (at your option) any later + version. + + GCC is distributed in the hope that it will be useful, but WITHOUT ANY + WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + for more details. + + You should have received a copy of the GNU General Public License + along with GCC; see the file COPYING3. If not see + <http://www.gnu.org/licenses/>. */ + +#include <cassert> +#include <iostream> +#include <iomanip> +#include <sstream> + +#include "config.h" +#include "system.h" +#include "coretypes.h" +#include "target.h" +#include "function.h" +#include "brig-to-generic.h" +#include "stringpool.h" +#include "tree-iterator.h" +#include "toplev.h" +#include "gimplify.h" +#include "gimple-expr.h" +#include "print-tree.h" +#include "hsa-brig-format.h" +#include "stor-layout.h" +#include "diagnostic-core.h" +#include "brig-code-entry-handler.h" +#include "brig-machine.h" +#include "brig-util.h" +#include "phsa.h" +#include "tree-pretty-print.h" +#include "dumpfile.h" +#include "tree-cfg.h" +#include "errors.h" +#include "fold-const.h" +#include "cgraph.h" +#include "dumpfile.h" +#include "tree-pretty-print.h" + +extern int gccbrig_verbose; + +tree brig_to_generic::s_fp16_type; +tree brig_to_generic::s_fp32_type; +tree brig_to_generic::s_fp64_type; + +brig_to_generic::brig_to_generic () + : m_cf (NULL), m_brig (NULL), m_next_group_offset (0), + m_next_private_offset (0) +{ + m_globals = NULL_TREE; + + /* Initialize the basic REAL types. + This doesn't work straight away because most of the targets + do not support fp16 natively. Let's by default convert + to fp32 and back before and after each instruction (handle it as + a storage format only), and later add an optimization pass + that removes the extra converts (in case of multiple fp16 ops + in a row). */ + s_fp16_type = make_node (REAL_TYPE); + TYPE_PRECISION (s_fp16_type) = 16; + TYPE_SIZE (s_fp16_type) = bitsize_int (16); + TYPE_SIZE_UNIT (s_fp16_type) = size_int (2); + SET_TYPE_ALIGN (s_fp16_type, 16); + layout_type (s_fp16_type); + + s_fp32_type = gccbrig_tree_type_for_hsa_type (BRIG_TYPE_F32); + s_fp64_type = gccbrig_tree_type_for_hsa_type (BRIG_TYPE_F64); + + /* TODO: (machine)query the preferred rounding mode that is set by + the machine by default. This can be redefined by each BRIG module + header. */ + m_default_float_rounding_mode = BRIG_ROUND_FLOAT_ZERO; + + m_dump_file = dump_begin (TDI_original, &m_dump_flags); +} + +class unimplemented_entry_handler : public brig_code_entry_handler +{ +public: + unimplemented_entry_handler (brig_to_generic &parent) + : brig_code_entry_handler (parent) + { + } + + size_t + operator () (const BrigBase *base) + { + gcc_unreachable (); + return base->byteCount; + } +}; + +/* Handler for entries that can be (and are) safely skipped for the purposes + of GENERIC generation. */ + +class skipped_entry_handler : public brig_code_entry_handler +{ +public: + skipped_entry_handler (brig_to_generic &parent) + : brig_code_entry_handler (parent) + { + } + + size_t + operator () (const BrigBase *base) + { + return base->byteCount; + } +}; + +/* Parses the given BRIG blob. */ + +void +brig_to_generic::parse (const char *brig_blob) +{ + m_brig = brig_blob; + m_brig_blobs.push_back (brig_blob); + + const BrigModuleHeader *mheader = (const BrigModuleHeader *) brig_blob; + + if (strncmp (mheader->identification, "HSA BRIG", 8) != 0) + fatal_error (UNKNOWN_LOCATION, PHSA_ERROR_PREFIX_INCOMPATIBLE_MODULE + "Unrecognized file format."); + if (mheader->brigMajor != 1 || mheader->brigMinor != 0) + fatal_error (UNKNOWN_LOCATION, PHSA_ERROR_PREFIX_INCOMPATIBLE_MODULE + "BRIG version not supported. BRIG 1.0 required."); + + m_data = m_code = m_operand = NULL; + + /* Find the positions of the different sections. */ + for (uint32_t sec = 0; sec < mheader->sectionCount; ++sec) + { + uint64_t offset + = ((const uint64_t *) (brig_blob + mheader->sectionIndex))[sec]; + + const BrigSectionHeader *section_header + = (const BrigSectionHeader *) (brig_blob + offset); + + std::string name ((const char *) (§ion_header->name), + section_header->nameLength); + + if (sec == BRIG_SECTION_INDEX_DATA && name == "hsa_data") + { + m_data = (const char *) section_header; + m_data_size = section_header->byteCount; + } + else if (sec == BRIG_SECTION_INDEX_CODE && name == "hsa_code") + { + m_code = (const char *) section_header; + m_code_size = section_header->byteCount; + } + else if (sec == BRIG_SECTION_INDEX_OPERAND && name == "hsa_operand") + { + m_operand = (const char *) section_header; + m_operand_size = section_header->byteCount; + } + else + { + gcc_unreachable (); + } + } + + if (m_code == NULL) + gcc_unreachable (); + if (m_data == NULL) + gcc_unreachable (); + if (m_operand == NULL) + gcc_unreachable (); + + brig_basic_inst_handler inst_handler (*this); + brig_branch_inst_handler branch_inst_handler (*this); + brig_cvt_inst_handler cvt_inst_handler (*this); + brig_seg_inst_handler seg_inst_handler (*this); + brig_copy_move_inst_handler copy_move_inst_handler (*this); + brig_signal_inst_handler signal_inst_handler (*this); + brig_atomic_inst_handler atomic_inst_handler (*this); + brig_cmp_inst_handler cmp_inst_handler (*this); + brig_mem_inst_handler mem_inst_handler (*this); + brig_inst_mod_handler inst_mod_handler (*this); + brig_directive_label_handler label_handler (*this); + brig_directive_variable_handler var_handler (*this); + brig_directive_fbarrier_handler fbar_handler (*this); + brig_directive_comment_handler comment_handler (*this); + brig_directive_function_handler func_handler (*this); + brig_directive_control_handler control_handler (*this); + brig_directive_arg_block_handler arg_block_handler (*this); + brig_directive_module_handler module_handler (*this); + brig_lane_inst_handler lane_inst_handler (*this); + brig_queue_inst_handler queue_inst_handler (*this); + skipped_entry_handler skipped_handler (*this); + unimplemented_entry_handler unimplemented_handler (*this); + + struct code_entry_handler_info + { + BrigKind kind; + brig_code_entry_handler *handler; + }; + + /* TODO: Convert to a hash table / map. For now, put the more common + entries to the top to keep the scan fast on average. */ + code_entry_handler_info handlers[] + = {{BRIG_KIND_INST_BASIC, &inst_handler}, + {BRIG_KIND_INST_CMP, &cmp_inst_handler}, + {BRIG_KIND_INST_MEM, &mem_inst_handler}, + {BRIG_KIND_INST_MOD, &inst_mod_handler}, + {BRIG_KIND_INST_CVT, &cvt_inst_handler}, + {BRIG_KIND_INST_SEG_CVT, &seg_inst_handler}, + {BRIG_KIND_INST_SEG, &seg_inst_handler}, + {BRIG_KIND_INST_ADDR, ©_move_inst_handler}, + {BRIG_KIND_INST_SOURCE_TYPE, ©_move_inst_handler}, + {BRIG_KIND_INST_ATOMIC, &atomic_inst_handler}, + {BRIG_KIND_INST_SIGNAL, &signal_inst_handler}, + {BRIG_KIND_INST_BR, &branch_inst_handler}, + {BRIG_KIND_INST_LANE, &lane_inst_handler}, + {BRIG_KIND_INST_QUEUE, &queue_inst_handler}, + /* Assuming fences are not needed. FIXME: call builtins + when porting to a platform where they are. */ + {BRIG_KIND_INST_MEM_FENCE, &skipped_handler}, + {BRIG_KIND_DIRECTIVE_LABEL, &label_handler}, + {BRIG_KIND_DIRECTIVE_VARIABLE, &var_handler}, + {BRIG_KIND_DIRECTIVE_ARG_BLOCK_START, &arg_block_handler}, + {BRIG_KIND_DIRECTIVE_ARG_BLOCK_END, &arg_block_handler}, + {BRIG_KIND_DIRECTIVE_FBARRIER, &fbar_handler}, + {BRIG_KIND_DIRECTIVE_COMMENT, &comment_handler}, + {BRIG_KIND_DIRECTIVE_KERNEL, &func_handler}, + {BRIG_KIND_DIRECTIVE_SIGNATURE, &func_handler}, + {BRIG_KIND_DIRECTIVE_FUNCTION, &func_handler}, + {BRIG_KIND_DIRECTIVE_INDIRECT_FUNCTION, &func_handler}, + {BRIG_KIND_DIRECTIVE_MODULE, &module_handler}, + /* Skipping debug locations for now as not needed for conformance. */ + {BRIG_KIND_DIRECTIVE_LOC, &skipped_handler}, + /* There are no supported pragmas at this moment. */ + {BRIG_KIND_DIRECTIVE_PRAGMA, &skipped_handler}, + {BRIG_KIND_DIRECTIVE_CONTROL, &control_handler}, + {BRIG_KIND_DIRECTIVE_EXTENSION, &skipped_handler}}; + + const BrigSectionHeader *csection_header = (const BrigSectionHeader *) m_code; + + for (size_t b = csection_header->headerByteCount; b < m_code_size;) + { + const BrigBase *entry = (const BrigBase *) (m_code + b); + + brig_code_entry_handler *handler = &unimplemented_handler; + + if (m_cf != NULL && b >= m_cf->m_brig_def->nextModuleEntry) + finish_function (); /* The function definition ended. */ + + /* Find a handler. */ + for (size_t i = 0; + i < sizeof (handlers) / sizeof (code_entry_handler_info); ++i) + { + if (handlers[i].kind == entry->kind) + handler = handlers[i].handler; + } + b += (*handler) (entry); + continue; + } + + finish_function (); +} + +const BrigData * +brig_to_generic::get_brig_data_entry (size_t entry_offset) const +{ + return (const BrigData *) (m_data + entry_offset); +} + +const BrigBase * +brig_to_generic::get_brig_operand_entry (size_t entry_offset) const +{ + return (const BrigBase *) (m_operand + entry_offset); +} + +const BrigBase * +brig_to_generic::get_brig_code_entry (size_t entry_offset) const +{ + return (const BrigBase *) (m_code + entry_offset); +} + +void +brig_to_generic::append_global (tree g) +{ + if (m_globals == NULL_TREE) + { + m_globals = g; + return; + } + else + { + tree last = tree_last (m_globals); + TREE_CHAIN (last) = g; + } +} + +tree +brig_to_generic::global_variable (const std::string &name) const +{ + label_index::const_iterator i = m_global_variables.find (name); + if (i == m_global_variables.end ()) + return NULL_TREE; + else + return (*i).second; +} + +/* Returns a function declaration with the given name. Assumes it has been + created previously via a DirectiveFunction or similar. */ + +tree +brig_to_generic::function_decl (const std::string &name) +{ + label_index::const_iterator i = m_function_index.find (name); + if (i == m_function_index.end ()) + return NULL_TREE; + return (*i).second; +} + +void +brig_to_generic::add_function_decl (const std::string &name, tree func_decl) +{ + m_function_index[name] = func_decl; +} + +/* Adds a GENERIC global variable VAR_DECL with the given NAME to the + current module. If we have generated a host def var ptr (a place holder + for variables that are defined by the HSA host code) for this global + variable definition (because there was a declaration earlier which looked + like it might have been a host defined variable), we now have + to assign its address and make it private to allow the references to + point to the defined variable instead. */ + +void +brig_to_generic::add_global_variable (const std::string &name, tree var_decl) +{ + append_global (var_decl); + m_global_variables[name] = var_decl; + + std::string host_def_var_name + = std::string (PHSA_HOST_DEF_PTR_PREFIX) + name; + tree host_def_var = global_variable (host_def_var_name.c_str ()); + if (host_def_var == NULL_TREE) + return; + + tree ptype = build_pointer_type (TREE_TYPE (var_decl)); + tree var_addr = build1 (ADDR_EXPR, ptype, var_decl); + + DECL_INITIAL (host_def_var) = var_addr; + TREE_PUBLIC (host_def_var) = 0; +} + +/* Adds an indirection pointer for a potential host-defined program scope + variable declaration. */ + +void +brig_to_generic::add_host_def_var_ptr (const std::string &name, tree var_decl) +{ + std::string var_name = std::string (PHSA_HOST_DEF_PTR_PREFIX) + name; + + tree name_identifier = get_identifier (var_name.c_str ()); + + tree ptr_var = build_decl (UNKNOWN_LOCATION, VAR_DECL, name_identifier, + build_pointer_type (TREE_TYPE (var_decl))); + DECL_EXTERNAL (ptr_var) = 0; + DECL_ARTIFICIAL (ptr_var) = 0; + + TREE_PUBLIC (ptr_var) = 1; + TREE_USED (ptr_var) = 1; + TREE_ADDRESSABLE (ptr_var) = 1; + TREE_STATIC (ptr_var) = 1; + + append_global (ptr_var); + m_global_variables[var_name] = ptr_var; +} + +/* Produce a "mangled name" for the given brig function or kernel. + The mangling is used to make unique global symbol name in case of + module scope functions. Program scope functions are not mangled + (except for dropping the leading &), which makes the functions + directly visible for linking using the original function name. */ + +std::string +brig_to_generic::get_mangled_name +(const BrigDirectiveExecutable *func) const +{ + /* Strip the leading &. */ + std::string func_name = get_string (func->name).substr (1); + if (func->linkage == BRIG_LINKAGE_MODULE) + { + /* Mangle the module scope function names with the module name and + make them public so they can be queried by the HSA runtime from + the produced binary. Assume it's the currently processed function + we are always referring to. */ + func_name = "gccbrig." + m_module_name + "." + func_name; + } + return func_name; +} + +std::string +brig_to_generic::get_string (size_t entry_offset) const +{ + const BrigData *data_item = get_brig_data_entry (entry_offset); + return std::string ((const char *) &data_item->bytes, data_item->byteCount); +} + +/* Adapted from c-semantics.c. */ + +tree +build_stmt (enum tree_code code, ...) +{ + tree ret; + int length, i; + va_list p; + bool side_effects; + + /* This function cannot be used to construct variably-sized nodes. */ + gcc_assert (TREE_CODE_CLASS (code) != tcc_vl_exp); + + va_start (p, code); + + ret = make_node (code); + TREE_TYPE (ret) = void_type_node; + length = TREE_CODE_LENGTH (code); + + /* TREE_SIDE_EFFECTS will already be set for statements with + implicit side effects. Here we make sure it is set for other + expressions by checking whether the parameters have side + effects. */ + + side_effects = false; + for (i = 0; i < length; i++) + { + tree t = va_arg (p, tree); + if (t && !TYPE_P (t)) + side_effects |= TREE_SIDE_EFFECTS (t); + TREE_OPERAND (ret, i) = t; + } + + TREE_SIDE_EFFECTS (ret) |= side_effects; + + va_end (p); + return ret; +} + +/* BRIG regs are untyped, but GENERIC is not. We need to add implicit casts + in case treating the operand with an instruction with a type different + than the created reg var type in order to select correct instruction type + later on. This function creates the necessary reinterpret type cast from + a source variable to the destination type. In case no cast is needed to + the same type, SOURCE is returned directly. */ + +tree +build_reinterpret_cast (tree destination_type, tree source) +{ + + gcc_assert (source && destination_type && TREE_TYPE (source) != NULL_TREE + && destination_type != NULL_TREE); + + tree source_type = TREE_TYPE (source); + if (TREE_CODE (source) == CALL_EXPR) + { + tree func_decl = TREE_OPERAND (TREE_OPERAND (source, 1), 0); + source_type = TREE_TYPE (TREE_TYPE (func_decl)); + } + + if (destination_type == source_type) + return source; + + size_t src_size = int_size_in_bytes (source_type); + size_t dst_size = int_size_in_bytes (destination_type); + if (src_size == dst_size) + return build1 (VIEW_CONVERT_EXPR, destination_type, source); + else if (src_size < dst_size) + { + /* The src_size can be smaller at least with f16 scalars which are + stored to 32b register variables. First convert to an equivalent + size unsigned type, then extend to an unsigned type of the + target width, after which VIEW_CONVERT_EXPR can be used to + force to the target type. */ + tree unsigned_temp = build1 (VIEW_CONVERT_EXPR, + get_unsigned_int_type (source_type), + source); + return build1 (VIEW_CONVERT_EXPR, destination_type, + convert (get_unsigned_int_type (destination_type), + unsigned_temp)); + } + else + gcc_unreachable (); + return NULL_TREE; +} + +/* Returns the finished brig_function for the given generic FUNC_DECL, + or NULL, if not found. */ + +brig_function * +brig_to_generic::get_finished_function (tree func_decl) +{ + std::string func_name + = identifier_to_locale (IDENTIFIER_POINTER (DECL_NAME (func_decl))); + std::map<std::string, brig_function *>::iterator i + = m_finished_functions.find (func_name); + if (i != m_finished_functions.end ()) + return (*i).second; + else + return NULL; +} + +/* Finalizes the currently handled function. Should be called before + setting a new function. */ + +void +brig_to_generic::finish_function () +{ + if (m_cf == NULL || m_cf->m_func_decl == NULL_TREE) + { + /* It can be a finished func declaration fingerprint, in that case we + don't have m_func_decl. */ + m_cf = NULL; + return; + } + + if (!m_cf->m_is_kernel) + { + tree bind_expr = m_cf->m_current_bind_expr; + tree stmts = BIND_EXPR_BODY (bind_expr); + m_cf->finish (); + m_cf->emit_metadata (stmts); + dump_function (m_dump_file, m_cf); + gimplify_function_tree (m_cf->m_func_decl); + cgraph_node::finalize_function (m_cf->m_func_decl, true); + } + else + /* Emit the kernel only at the very end so we can analyze the total + group and private memory usage. */ + m_kernels.push_back (m_cf); + + pop_cfun (); + + m_finished_functions[m_cf->m_name] = m_cf; + m_cf = NULL; +} + +/* Initializes a new currently handled function. */ + +void +brig_to_generic::start_function (tree f) +{ + if (DECL_STRUCT_FUNCTION (f) == NULL) + push_struct_function (f); + else + push_cfun (DECL_STRUCT_FUNCTION (f)); + + m_cf->m_func_decl = f; +} + +/* Appends a new group variable (or an fbarrier) to the current kernel's + group segment. */ + +void +brig_to_generic::append_group_variable (const std::string &name, size_t size, + size_t alignment) +{ + size_t align_padding = m_next_group_offset % alignment == 0 ? + 0 : (alignment - m_next_group_offset % alignment); + m_next_group_offset += align_padding; + m_group_offsets[name] = m_next_group_offset; + m_next_group_offset += size; +} + +size_t +brig_to_generic::group_variable_segment_offset (const std::string &name) const +{ + var_offset_table::const_iterator i = m_group_offsets.find (name); + gcc_assert (i != m_group_offsets.end ()); + return (*i).second; +} + +/* The size of the group and private segments required by the currently + processed kernel. Private segment size must be multiplied by the + number of work-items in the launch, in case of a work-group function. */ + +size_t +brig_to_generic::group_segment_size () const +{ + return m_next_group_offset; +} + +/* Appends a new group variable to the current kernel's private segment. */ + +void +brig_to_generic::append_private_variable (const std::string &name, + size_t size, size_t alignment) +{ + size_t align_padding = m_next_private_offset % alignment == 0 ? + 0 : (alignment - m_next_private_offset % alignment); + m_next_private_offset += align_padding; + m_private_offsets[name] = m_next_private_offset; + m_next_private_offset += size; + m_private_data_sizes[name] = size + align_padding; +} + +size_t +brig_to_generic::private_variable_segment_offset + (const std::string &name) const +{ + var_offset_table::const_iterator i = m_private_offsets.find (name); + gcc_assert (i != m_private_offsets.end ()); + return (*i).second; +} + +bool +brig_to_generic::has_private_variable (const std::string &name) const +{ + std::map<std::string, size_t>::const_iterator i + = m_private_data_sizes.find (name); + return i != m_private_data_sizes.end (); +} + +bool +brig_to_generic::has_group_variable (const std::string &name) const +{ + var_offset_table::const_iterator i = m_group_offsets.find (name); + return i != m_group_offsets.end (); +} + +size_t +brig_to_generic::private_variable_size (const std::string &name) const +{ + std::map<std::string, size_t>::const_iterator i + = m_private_data_sizes.find (name); + gcc_assert (i != m_private_data_sizes.end ()); + return (*i).second; +} + +size_t +brig_to_generic::private_segment_size () const +{ + return m_next_private_offset; +} + +/* Cached builtins indexed by name. */ + +typedef std::map<std::string, tree> builtin_index; +builtin_index builtin_cache_; + +/* Build a call to a builtin function. PDECL is the builtin function to + call. NARGS is the number of input arguments, RETTYPE the built-in + functions return value type, and ... is the list of arguments passed to + the call with type first, then the value. */ + +tree +call_builtin (tree pdecl, int nargs, tree rettype, ...) +{ + if (rettype == error_mark_node) + return error_mark_node; + + tree *types = new tree[nargs]; + tree *args = new tree[nargs]; + + va_list ap; + va_start (ap, rettype); + for (int i = 0; i < nargs; ++i) + { + types[i] = va_arg (ap, tree); + tree arg = va_arg (ap, tree); + args[i] = build_reinterpret_cast (types[i], arg); + if (types[i] == error_mark_node || args[i] == error_mark_node) + { + delete[] types; + delete[] args; + return error_mark_node; + } + } + va_end (ap); + + tree fnptr = build_fold_addr_expr (pdecl); + + tree ret = build_call_array (rettype, fnptr, nargs, args); + + delete[] types; + delete[] args; + + return ret; +} + +/* Generate all global declarations. Should be called after the last + BRIG has been fed in. */ + +void +brig_to_generic::write_globals () +{ + /* Now that the whole BRIG module has been processed, build a launcher + and a metadata section for each built kernel. */ + for (size_t i = 0; i < m_kernels.size (); ++i) + { + brig_function *f = m_kernels[i]; + + /* Finish kernels now that we know the call graphs and their barrier + usage. */ + f->finish_kernel (); + + dump_function (m_dump_file, f); + gimplify_function_tree (f->m_func_decl); + cgraph_node::finalize_function (f->m_func_decl, true); + + f->m_descriptor.is_kernel = 1; + /* TODO: analyze the kernel's actual group and private segment usage + using a call graph. Now the private and group mem sizes are overly + pessimistic in case of multiple kernels in the same module. */ + f->m_descriptor.group_segment_size = group_segment_size (); + f->m_descriptor.private_segment_size = private_segment_size (); + + /* The kernarg size is rounded up to a multiple of 16 according to + the PRM specs. */ + f->m_descriptor.kernarg_segment_size = f->m_next_kernarg_offset; + if (f->m_descriptor.kernarg_segment_size % 16 > 0) + f->m_descriptor.kernarg_segment_size + += 16 - f->m_next_kernarg_offset % 16; + f->m_descriptor.kernarg_max_align = f->m_kernarg_max_align; + + tree launcher = f->emit_launcher_and_metadata (); + + append_global (launcher); + + gimplify_function_tree (launcher); + cgraph_node::finalize_function (launcher, true); + pop_cfun (); + } + + int no_globals = list_length (m_globals); + tree *vec = new tree[no_globals]; + + int i = 0; + tree global = m_globals; + while (global) + { + vec[i] = global; + ++i; + global = TREE_CHAIN (global); + } + + wrapup_global_declarations (vec, no_globals); + + delete[] vec; + + for (size_t i = 0; i < m_brig_blobs.size (); ++i) + delete m_brig_blobs[i]; +} + +/* Returns an type with unsigned int elements corresponding to the + size and element count of ORIGINAL_TYPE. */ + +tree +get_unsigned_int_type (tree original_type) +{ + if (VECTOR_TYPE_P (original_type)) + { + size_t esize + = int_size_in_bytes (TREE_TYPE (original_type)) * BITS_PER_UNIT; + size_t ecount = TYPE_VECTOR_SUBPARTS (original_type); + return build_vector_type (build_nonstandard_integer_type (esize, true), + ecount); + } + else + return build_nonstandard_integer_type (int_size_in_bytes (original_type) + * BITS_PER_UNIT, + true); +} + +void +dump_function (FILE *dump_file, brig_function *f) +{ + /* Dump the BRIG-specific tree IR. */ + if (dump_file) + { + fprintf (dump_file, "\n;; Function %s", f->m_name.c_str ()); + fprintf (dump_file, "\n;; enabled by -%s\n\n", + dump_flag_name (TDI_original)); + print_generic_decl (dump_file, f->m_func_decl, 0); + print_generic_expr (dump_file, f->m_current_bind_expr, 0); + fprintf (dump_file, "\n"); + } +} diff --git a/gcc/brig/brigfrontend/brig-to-generic.h b/gcc/brig/brigfrontend/brig-to-generic.h new file mode 100644 index 00000000000..8e15589754b --- /dev/null +++ b/gcc/brig/brigfrontend/brig-to-generic.h @@ -0,0 +1,225 @@ +/* brig-to-generic.h -- brig to gcc generic conversion + Copyright (C) 2016 Free Software Foundation, Inc. + Contributed by Pekka Jaaskelainen <pekka.jaaskelainen@parmance.com> + for General Processor Tech. + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify it under + the terms of the GNU General Public License as published by the Free + Software Foundation; either version 3, or (at your option) any later + version. + + GCC is distributed in the hope that it will be useful, but WITHOUT ANY + WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + for more details. + + You should have received a copy of the GNU General Public License + along with GCC; see the file COPYING3. If not see + <http://www.gnu.org/licenses/>. */ + +#ifndef BRIG_TO_GENERIC_H +#define BRIG_TO_GENERIC_H + +#include <string> +#include <map> +#include <vector> + +#include "config.h" +#include "system.h" +#include "ansidecl.h" +#include "coretypes.h" +#include "opts.h" +#include "tree.h" +#include "tree-iterator.h" +#include "hsa-brig-format.h" +#include "brig-function.h" + + +struct reg_decl_index_entry; + +/* Converts an HSAIL BRIG input to GENERIC. This class holds global state + for the translation process. Handling of the smaller pieces of BRIG data + is delegated to various handler classes declared in + brig-code-entry-handlers.h. */ + +class brig_to_generic +{ +public: + typedef std::map<const BrigDirectiveVariable *, tree> variable_index; + +private: + typedef std::map<std::string, size_t> var_offset_table; + typedef std::map<const BrigBase *, std::string> name_index; + +public: + brig_to_generic (); + + void parse (const char *brig_blob); + + void write_globals (); + + std::string get_string (size_t entry_offset) const; + + const BrigData *get_brig_data_entry (size_t entry_offset) const; + const BrigBase *get_brig_operand_entry (size_t entry_offset) const; + const BrigBase *get_brig_code_entry (size_t entry_offset) const; + + void append_global (tree g); + + tree function_decl (const std::string &name); + void add_function_decl (const std::string &name, tree func_decl); + + tree global_variable (const std::string &name) const; + void add_global_variable (const std::string &name, tree var_decl); + void add_host_def_var_ptr (const std::string &name, tree var_decl); + + void start_function (tree f); + void finish_function (); + + void append_group_variable (const std::string &name, size_t size, + size_t alignment); + + void append_private_variable (const std::string &name, size_t size, + size_t alignment); + + size_t group_variable_segment_offset (const std::string &name) const; + + bool + has_group_variable (const std::string &name) const; + + size_t + private_variable_segment_offset (const std::string &name) const; + + bool + has_private_variable (const std::string &name) const; + + size_t private_variable_size (const std::string &name) const; + + template <typename T> + std::string + get_mangled_name_tmpl (const T *brigVar) const; + + std::string get_mangled_name (const BrigDirectiveFbarrier *fbar) const + { return get_mangled_name_tmpl (fbar); } + std::string get_mangled_name (const BrigDirectiveVariable *var) const + { return get_mangled_name_tmpl (var); } + std::string get_mangled_name (const BrigDirectiveExecutable *func) const; + + size_t group_segment_size () const; + size_t private_segment_size () const; + + brig_function *get_finished_function (tree func_decl); + + static tree s_fp16_type; + static tree s_fp32_type; + static tree s_fp64_type; + + /* The default rounding mode that should be used for float instructions. + This can be set in each BRIG module header. */ + BrigRound8_t m_default_float_rounding_mode; + + /* The currently built function. */ + brig_function *m_cf; + + /* The name of the currently handled BRIG module. */ + std::string m_module_name; + +private: + /* The BRIG blob and its different sections of the file currently being + parsed. */ + const char *m_brig; + const char *m_data; + size_t m_data_size; + const char *m_operand; + size_t m_operand_size; + const char *m_code; + size_t m_code_size; + + tree m_globals; + + label_index m_global_variables; + + /* The size of each private variable, including the alignment padding. */ + std::map<std::string, size_t> m_private_data_sizes; + + /* The same for group variables. */ + size_t m_next_group_offset; + var_offset_table m_group_offsets; + + /* And private. */ + size_t m_next_private_offset; + var_offset_table m_private_offsets; + + /* Name index for declared functions. */ + label_index m_function_index; + + /* Stores all processed kernels in order. */ + std::vector<brig_function *> m_kernels; + + /* Stores all already processed functions from the translation unit + for some interprocedural analysis. */ + std::map<std::string, brig_function *> m_finished_functions; + + /* The parsed BRIG blobs. Owned and will be deleted after use. */ + std::vector<const char *> m_brig_blobs; + + /* The original dump file. */ + FILE *m_dump_file; + + /* The original dump file flags. */ + int m_dump_flags; +}; + +/* Produce a "mangled name" for the given brig variable. The mangling is used + to make unique global symbol names for module and function scope variables. + The templated version is suitable for most of the variable types. Functions + and kernels (BrigDirectiveExecutable) are handled with a specialized + get_mangled_name() version. */ + +template <typename T> +std::string +brig_to_generic::get_mangled_name_tmpl (const T *brigVar) const +{ + std::string var_name = get_string (brigVar->name).substr (1); + + /* Mangle the variable name using the function name and the module name + in case of a function scope variable. */ + if (m_cf != NULL + && m_cf->has_function_scope_var (&brigVar->base)) + var_name = m_cf->m_name + "." + var_name; + + if (brigVar->linkage == BRIG_LINKAGE_MODULE) + var_name = "gccbrig." + m_module_name + "." + var_name; + return var_name; +} + +/* An interface to organize the different types of BRIG element handlers. */ + +class brig_entry_handler +{ +public: + brig_entry_handler (brig_to_generic &parent) : m_parent (parent) + { + } + + /* Handles the brig_code data at the given pointer and adds it to the + currently built tree. Returns the number of consumed bytes; */ + virtual size_t operator () (const BrigBase *base) = 0; + +protected: + brig_to_generic &m_parent; +}; + +tree call_builtin (tree pdecl, int nargs, tree rettype, ...); + +tree build_reinterpret_cast (tree destination_type, tree source); + +tree build_stmt (enum tree_code code, ...); + +tree get_unsigned_int_type (tree type); + +void dump_function (FILE *dump_file, brig_function *f); + +#endif diff --git a/gcc/brig/brigfrontend/brig-util.cc b/gcc/brig/brigfrontend/brig-util.cc new file mode 100644 index 00000000000..8b17b59d288 --- /dev/null +++ b/gcc/brig/brigfrontend/brig-util.cc @@ -0,0 +1,447 @@ +/* brig-util.cc -- gccbrig utility functions + Copyright (C) 2016 Free Software Foundation, Inc. + Contributed by Pekka Jaaskelainen <pekka.jaaskelainen@parmance.com> + for General Processor Tech. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 3, or (at your option) any later +version. + +GCC is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +<http://www.gnu.org/licenses/>. */ + +#include <sstream> + +#include "stdint.h" +#include "hsa-brig-format.h" +#include "brig-util.h" +#include "errors.h" +#include "diagnostic-core.h" + +/* Return true if operand number OPNUM of instruction with OPCODE is an output. + False if it is an input. Some code reused from Martin Jambor's gcc-hsa + tree. */ + +bool +gccbrig_hsa_opcode_op_output_p (BrigOpcode16_t opcode, int opnum) +{ + switch (opcode) + { + case BRIG_OPCODE_BR: + case BRIG_OPCODE_SBR: + case BRIG_OPCODE_CBR: + case BRIG_OPCODE_ST: + case BRIG_OPCODE_ATOMICNORET: + case BRIG_OPCODE_SIGNALNORET: + case BRIG_OPCODE_INITFBAR: + case BRIG_OPCODE_JOINFBAR: + case BRIG_OPCODE_WAITFBAR: + case BRIG_OPCODE_ARRIVEFBAR: + case BRIG_OPCODE_LEAVEFBAR: + case BRIG_OPCODE_RELEASEFBAR: + case BRIG_OPCODE_DEBUGTRAP: + return false; + default: + return opnum == 0; + } +} + +unsigned +gccbrig_hsa_type_bit_size (BrigType16_t t) +{ + + unsigned pack_type = t & ~BRIG_TYPE_BASE_MASK; + + if (pack_type == BRIG_TYPE_PACK_32) + return 32; + else if (pack_type == BRIG_TYPE_PACK_64) + return 64; + else if (pack_type == BRIG_TYPE_PACK_128) + return 128; + + switch (t) + { + case BRIG_TYPE_NONE: + return 0; + + case BRIG_TYPE_B1: + return 1; + + case BRIG_TYPE_U8: + case BRIG_TYPE_S8: + case BRIG_TYPE_B8: + return 8; + + case BRIG_TYPE_U16: + case BRIG_TYPE_S16: + case BRIG_TYPE_B16: + case BRIG_TYPE_F16: + return 16; + + case BRIG_TYPE_U32: + case BRIG_TYPE_S32: + case BRIG_TYPE_B32: + case BRIG_TYPE_F32: + case BRIG_TYPE_U8X4: + case BRIG_TYPE_U16X2: + case BRIG_TYPE_S8X4: + case BRIG_TYPE_S16X2: + case BRIG_TYPE_F16X2: + case BRIG_TYPE_SIG32: + return 32; + + case BRIG_TYPE_U64: + case BRIG_TYPE_S64: + case BRIG_TYPE_F64: + case BRIG_TYPE_B64: + case BRIG_TYPE_U8X8: + case BRIG_TYPE_U16X4: + case BRIG_TYPE_U32X2: + case BRIG_TYPE_S8X8: + case BRIG_TYPE_S16X4: + case BRIG_TYPE_S32X2: + case BRIG_TYPE_F16X4: + case BRIG_TYPE_F32X2: + case BRIG_TYPE_SIG64: + return 64; + + case BRIG_TYPE_B128: + case BRIG_TYPE_U8X16: + case BRIG_TYPE_U16X8: + case BRIG_TYPE_U32X4: + case BRIG_TYPE_U64X2: + case BRIG_TYPE_S8X16: + case BRIG_TYPE_S16X8: + case BRIG_TYPE_S32X4: + case BRIG_TYPE_S64X2: + case BRIG_TYPE_F16X8: + case BRIG_TYPE_F32X4: + case BRIG_TYPE_F64X2: + return 128; + + default: + printf ("HMM %d %x\n", t, t); + gcc_unreachable (); + } +} + +/* gcc-hsa borrowed code ENDS. */ + +uint64_t +gccbrig_to_uint64_t (const BrigUInt64 &brig_type) +{ + return (uint64_t (brig_type.hi) << 32) | uint64_t (brig_type.lo); +} + +int +gccbrig_reg_size (const BrigOperandRegister *brig_reg) +{ + switch (brig_reg->regKind) + { + case BRIG_REGISTER_KIND_CONTROL: + return 1; + case BRIG_REGISTER_KIND_SINGLE: + return 32; + case BRIG_REGISTER_KIND_DOUBLE: + return 64; + case BRIG_REGISTER_KIND_QUAD: + return 128; + default: + gcc_unreachable (); + break; + } +} + +std::string +gccbrig_reg_name (const BrigOperandRegister *reg) +{ + std::ostringstream strstr; + switch (reg->regKind) + { + case BRIG_REGISTER_KIND_CONTROL: + strstr << 'c'; + break; + case BRIG_REGISTER_KIND_SINGLE: + strstr << 's'; + break; + case BRIG_REGISTER_KIND_DOUBLE: + strstr << 'd'; + break; + case BRIG_REGISTER_KIND_QUAD: + strstr << 'q'; + break; + default: + gcc_unreachable (); + return ""; + } + strstr << reg->regNum; + return strstr.str (); +} + +std::string +gccbrig_type_name (BrigType16_t type) +{ + switch (type) + { + case BRIG_TYPE_U8: + return "u8"; + case BRIG_TYPE_U16: + return "u16"; + case BRIG_TYPE_U32: + return "u32"; + case BRIG_TYPE_U64: + return "u64"; + case BRIG_TYPE_S8: + return "s8"; + case BRIG_TYPE_S16: + return "s16"; + case BRIG_TYPE_S32: + return "s32"; + case BRIG_TYPE_S64: + return "s64"; + default: + gcc_unreachable (); + break; + } +} + +std::string +gccbrig_segment_name (BrigSegment8_t segment) +{ + if (segment == BRIG_SEGMENT_GLOBAL) + return "global"; + else if (segment == BRIG_SEGMENT_GROUP) + return "group"; + else if (segment == BRIG_SEGMENT_PRIVATE) + return "private"; + else + gcc_unreachable (); +} + +bool +gccbrig_is_float_type (BrigType16_t type) +{ + return (type == BRIG_TYPE_F32 || type == BRIG_TYPE_F64 + || type == BRIG_TYPE_F16); +} + +BrigType16_t +gccbrig_tree_type_to_hsa_type (tree tree_type) +{ + if (INTEGRAL_TYPE_P (tree_type)) + { + if (TYPE_UNSIGNED (tree_type)) + { + switch (int_size_in_bytes (tree_type)) + { + case 1: + return BRIG_TYPE_U8; + case 2: + return BRIG_TYPE_U16; + case 4: + return BRIG_TYPE_U32; + case 8: + return BRIG_TYPE_U64; + default: + break; + } + } + else + { + switch (int_size_in_bytes (tree_type)) + { + case 1: + return BRIG_TYPE_S8; + case 2: + return BRIG_TYPE_S16; + case 4: + return BRIG_TYPE_S32; + case 8: + return BRIG_TYPE_S64; + default: + break; + } + } + } + else if (VECTOR_TYPE_P (tree_type)) + { + tree element_type = TREE_TYPE (tree_type); + size_t element_size = int_size_in_bytes (element_type) * 8; + BrigType16_t brig_element_type; + switch (element_size) + { + case 8: + brig_element_type + = TYPE_UNSIGNED (element_type) ? BRIG_TYPE_U8 : BRIG_TYPE_S8; + break; + case 16: + brig_element_type + = TYPE_UNSIGNED (element_type) ? BRIG_TYPE_U16 : BRIG_TYPE_S16; + break; + case 32: + brig_element_type + = TYPE_UNSIGNED (element_type) ? BRIG_TYPE_U32 : BRIG_TYPE_S32; + break; + case 64: + brig_element_type + = TYPE_UNSIGNED (element_type) ? BRIG_TYPE_U64 : BRIG_TYPE_S64; + break; + default: + gcc_unreachable (); + } + + BrigType16_t pack_type; + switch (int_size_in_bytes (tree_type) * 8) + { + case 32: + pack_type = BRIG_TYPE_PACK_32; + break; + case 64: + pack_type = BRIG_TYPE_PACK_64; + break; + case 128: + pack_type = BRIG_TYPE_PACK_128; + break; + default: + gcc_unreachable (); + } + return brig_element_type | pack_type; + } + gcc_unreachable (); +} + +/* Returns true in case the operation is a "bit level" operation, + that is, not having operand type depending semantical differences. */ + +bool +gccbrig_is_bit_operation (BrigOpcode16_t opcode) +{ + return opcode == BRIG_OPCODE_CMOV || opcode == BRIG_OPCODE_SHUFFLE + || opcode == BRIG_OPCODE_UNPACK || opcode == BRIG_OPCODE_UNPACKLO + || opcode == BRIG_OPCODE_UNPACKHI || opcode == BRIG_OPCODE_ST + || opcode == BRIG_OPCODE_PACK; +} + +/* The program scope definition can be left external within the + kernel binary which means it must be defined by the host via + HSA runtime. For these we have special treatment: + Create additional pointer indirection when accessing the variable + value from kernel code through a generated pointer + __gccbrig_ptr_variable_name. The pointer value then can be set either + within the kernel binary (in case of a later linked in definition) + or from the host. */ + +bool +gccbrig_might_be_host_defined_var_p (const BrigDirectiveVariable *brigVar) +{ + bool is_definition = brigVar->modifier & BRIG_VARIABLE_DEFINITION; + return (brigVar->segment == BRIG_SEGMENT_GLOBAL + || brigVar->segment == BRIG_SEGMENT_READONLY) && !is_definition + && brigVar->linkage == BRIG_LINKAGE_PROGRAM + && (brigVar->allocation == BRIG_ALLOCATION_PROGRAM + || brigVar->allocation == BRIG_ALLOCATION_AGENT); +} + +/* Produce a GENERIC type for the given HSA/BRIG type. Returns the element + type in case of vector instructions. */ + +tree +gccbrig_tree_type_for_hsa_type (BrigType16_t brig_type) +{ + tree tree_type = NULL_TREE; + + if (hsa_type_packed_p (brig_type)) + { + /* The element type is encoded in the bottom 5 bits. */ + BrigType16_t inner_brig_type = brig_type & BRIG_TYPE_BASE_MASK; + + unsigned full_size = gccbrig_hsa_type_bit_size (brig_type); + + if (inner_brig_type == BRIG_TYPE_F16) + return build_vector_type (gccbrig_tree_type_for_hsa_type (BRIG_TYPE_U16), + full_size / 16); + + tree inner_type = gccbrig_tree_type_for_hsa_type (inner_brig_type); + + unsigned inner_size = gccbrig_hsa_type_bit_size (inner_brig_type); + unsigned nunits = full_size / inner_size; + tree_type = build_vector_type (inner_type, nunits); + } + else + { + switch (brig_type) + { + case BRIG_TYPE_NONE: + tree_type = void_type_node; + break; + case BRIG_TYPE_B1: + tree_type = boolean_type_node; + break; + case BRIG_TYPE_S8: + case BRIG_TYPE_S16: + case BRIG_TYPE_S32: + case BRIG_TYPE_S64: + /* Ensure a fixed width integer. */ + tree_type + = build_nonstandard_integer_type + (gccbrig_hsa_type_bit_size (brig_type), false); + break; + case BRIG_TYPE_U8: + return unsigned_char_type_node; + case BRIG_TYPE_U16: + case BRIG_TYPE_U32: + case BRIG_TYPE_U64: + case BRIG_TYPE_B8: /* Handle bit vectors as unsigned ints. */ + case BRIG_TYPE_B16: + case BRIG_TYPE_B32: + case BRIG_TYPE_B64: + case BRIG_TYPE_B128: + case BRIG_TYPE_SIG32: /* Handle signals as integers for now. */ + case BRIG_TYPE_SIG64: + tree_type = build_nonstandard_integer_type + (gccbrig_hsa_type_bit_size (brig_type), true); + break; + case BRIG_TYPE_F16: + tree_type = uint16_type_node; + break; + case BRIG_TYPE_F32: + /* TODO: make sure that the alignment of the float are at least as + strict than mandated by HSA, and conform to IEEE (like mandated + by HSA). */ + tree_type = float_type_node; + break; + case BRIG_TYPE_F64: + tree_type = double_type_node; + break; + case BRIG_TYPE_SAMP: + case BRIG_TYPE_ROIMG: + case BRIG_TYPE_WOIMG: + case BRIG_TYPE_RWIMG: + { + /* Handle images and samplers as target-specific blobs of data + that should be allocated earlier on from the runtime side. + Create a void* that should be initialized to point to the blobs + by the kernel launcher. Images and samplers are accessed + via builtins that take void* as the reference. TODO: who and + how these arrays should be initialized? */ + tree void_ptr = build_pointer_type (void_type_node); + return void_ptr; + } + default: + gcc_unreachable (); + break; + } + } + + /* Drop const qualifiers. */ + return tree_type; +} diff --git a/gcc/brig/brigfrontend/brig-util.h b/gcc/brig/brigfrontend/brig-util.h new file mode 100644 index 00000000000..3786616ee4d --- /dev/null +++ b/gcc/brig/brigfrontend/brig-util.h @@ -0,0 +1,53 @@ +/* brig-util.h -- gccbrig utility functions + Copyright (C) 2016 Free Software Foundation, Inc. + Contributed by Pekka Jaaskelainen <pekka.jaaskelainen@parmance.com> + for General Processor Tech. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 3, or (at your option) any later +version. + +GCC is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +<http://www.gnu.org/licenses/>. */ + +#ifndef GCC_BRIG_UTIL_H +#define GCC_BRIG_UTIL_H + +#include "brig-to-generic.h" + +bool gccbrig_hsa_opcode_op_output_p (BrigOpcode16_t opcode, int opnum); + +unsigned gccbrig_hsa_type_bit_size (BrigType16_t t); + +uint64_t gccbrig_to_uint64_t (const BrigUInt64 &brig_type); + +int gccbrig_reg_size (const BrigOperandRegister *brig_reg); + +std::string gccbrig_reg_name (const BrigOperandRegister *reg); + +std::string gccbrig_type_name (BrigType16_t type); + +std::string gccbrig_segment_name (BrigSegment8_t segment); + +bool gccbrig_is_float_type (BrigType16_t type); + +bool gccbrig_is_bit_operation (BrigOpcode16_t opcode); + +BrigType16_t gccbrig_tree_type_to_hsa_type (tree tree_type); +tree gccbrig_tree_type_for_hsa_type (BrigType16_t brig_type); + +bool gccbrig_might_be_host_defined_var_p (const BrigDirectiveVariable *brigVar); + +/* From hsa.h. */ +bool hsa_type_packed_p (BrigType16_t type); + +#endif diff --git a/gcc/brig/brigfrontend/brig-variable-handler.cc b/gcc/brig/brigfrontend/brig-variable-handler.cc new file mode 100644 index 00000000000..b4a8d67438b --- /dev/null +++ b/gcc/brig/brigfrontend/brig-variable-handler.cc @@ -0,0 +1,264 @@ +/* brig-variable-handler.cc -- brig variable directive handling + Copyright (C) 2016 Free Software Foundation, Inc. + Contributed by Pekka Jaaskelainen <pekka.jaaskelainen@parmance.com> + for General Processor Tech. + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify it under + the terms of the GNU General Public License as published by the Free + Software Foundation; either version 3, or (at your option) any later + version. + + GCC is distributed in the hope that it will be useful, but WITHOUT ANY + WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + for more details. + + You should have received a copy of the GNU General Public License + along with GCC; see the file COPYING3. If not see + <http://www.gnu.org/licenses/>. */ + +#include "brig-code-entry-handler.h" + +#include "stringpool.h" +#include "errors.h" +#include "brig-machine.h" +#include "brig-util.h" +#include "print-tree.h" +#include "diagnostic-core.h" + +tree +brig_directive_variable_handler::build_variable + (const BrigDirectiveVariable *brigVar, tree_code var_decltype) +{ + std::string var_name = m_parent.get_mangled_name (brigVar); + + bool is_definition = brigVar->modifier & BRIG_VARIABLE_DEFINITION; + + tree name_identifier = get_identifier (var_name.c_str ()); + + tree var_decl; + tree t; + if (brigVar->type & BRIG_TYPE_ARRAY) + { + tree element_type + = gccbrig_tree_type_for_hsa_type (brigVar->type & ~BRIG_TYPE_ARRAY); + uint64_t element_count = gccbrig_to_uint64_t (brigVar->dim); + if (is_definition && element_count == 0) + fatal_error (UNKNOWN_LOCATION, "Array definition with zero elements."); + if (var_decltype == PARM_DECL) + t = build_pointer_type (element_type); + else + t = build_array_type_nelts (element_type, element_count); + } + else + { + t = gccbrig_tree_type_for_hsa_type (brigVar->type); + } + + size_t alignment = get_brig_var_alignment (brigVar); + + if (brigVar->segment == BRIG_SEGMENT_READONLY + || brigVar->segment == BRIG_SEGMENT_KERNARG + || (brigVar->modifier & BRIG_VARIABLE_CONST)) + TYPE_READONLY (t) = 1; + + TYPE_ADDR_SPACE (t) = gccbrig_get_target_addr_space_id (brigVar->segment); + + var_decl = build_decl (UNKNOWN_LOCATION, var_decltype, name_identifier, t); + + SET_DECL_ALIGN (var_decl, alignment * BITS_PER_UNIT); + + /* Force the HSA alignments. */ + DECL_USER_ALIGN (var_decl) = 1; + + TREE_USED (var_decl) = 1; + + TREE_PUBLIC (var_decl) = 1; + if (is_definition) + DECL_EXTERNAL (var_decl) = 0; + else + DECL_EXTERNAL (var_decl) = 1; /* The definition is elsewhere. */ + + if (brigVar->init != 0) + { + gcc_assert (brigVar->segment == BRIG_SEGMENT_READONLY + || brigVar->segment == BRIG_SEGMENT_GLOBAL); + + const BrigBase *cst_operand_data + = m_parent.get_brig_operand_entry (brigVar->init); + + tree initializer = NULL_TREE; + if (cst_operand_data->kind == BRIG_KIND_OPERAND_CONSTANT_BYTES) + initializer = get_tree_cst_for_hsa_operand + ((const BrigOperandConstantBytes *) cst_operand_data, t); + else + error ("variable initializers of type %x not implemented", + cst_operand_data->kind); + gcc_assert (initializer != NULL_TREE); + DECL_INITIAL (var_decl) = initializer; + } + + if (var_decltype == PARM_DECL) + { + DECL_ARG_TYPE (var_decl) = TREE_TYPE (var_decl); + DECL_EXTERNAL (var_decl) = 0; + TREE_PUBLIC (var_decl) = 0; + } + + TREE_ADDRESSABLE (var_decl) = 1; + + TREE_USED (var_decl) = 1; + DECL_NONLOCAL (var_decl) = 1; + DECL_ARTIFICIAL (var_decl) = 0; + + return var_decl; +} + +size_t +brig_directive_variable_handler::operator () (const BrigBase *base) +{ + const BrigDirectiveVariable *brigVar = (const BrigDirectiveVariable *) base; + + bool is_definition = brigVar->modifier & BRIG_VARIABLE_DEFINITION; + + size_t var_size; + tree var_type; + if (brigVar->type & BRIG_TYPE_ARRAY) + { + tree element_type + = gccbrig_tree_type_for_hsa_type (brigVar->type & ~BRIG_TYPE_ARRAY); + uint64_t element_count = gccbrig_to_uint64_t (brigVar->dim); + if (is_definition && element_count == 0) + fatal_error (UNKNOWN_LOCATION, "Array definition with zero elements."); + var_type = build_array_type_nelts (element_type, element_count); + size_t element_size = tree_to_uhwi (TYPE_SIZE (element_type)); + var_size = element_size * element_count / 8; + } + else + { + var_type = gccbrig_tree_type_for_hsa_type (brigVar->type); + var_size = tree_to_uhwi (TYPE_SIZE (var_type)) / 8; + } + + size_t alignment = get_brig_var_alignment (brigVar); + + if (m_parent.m_cf != NULL) + m_parent.m_cf->m_function_scope_vars.insert (base); + + std::string var_name = m_parent.get_mangled_name (brigVar); + if (brigVar->segment == BRIG_SEGMENT_KERNARG) + { + /* Do not create a real variable, but only a table of + offsets to the kernarg segment buffer passed as the + single argument by the kernel launcher for later + reference. Ignore kernel declarations. */ + if (m_parent.m_cf != NULL && m_parent.m_cf->m_func_decl != NULL_TREE) + m_parent.m_cf->append_kernel_arg (brigVar, var_size, alignment); + return base->byteCount; + } + else if (brigVar->segment == BRIG_SEGMENT_GROUP) + { + /* Handle group region variables similarly as kernargs: + assign offsets to the group region on the fly when + a new module scope or function scope group variable is + introduced. These offsets will be then added to the + group_base hidden pointer passed to the kernel in order to + get the flat address. */ + if (!m_parent.has_group_variable (var_name)) + m_parent.append_group_variable (var_name, var_size, alignment); + return base->byteCount; + } + else if (brigVar->segment == BRIG_SEGMENT_PRIVATE + || brigVar->segment == BRIG_SEGMENT_SPILL) + { + /* Private variables are handled like group variables, + except that their offsets are multiplied by the work-item + flat id, when accessed. */ + if (!m_parent.has_private_variable (var_name)) + m_parent.append_private_variable (var_name, var_size, alignment); + return base->byteCount; + } + else if (brigVar->segment == BRIG_SEGMENT_GLOBAL + || brigVar->segment == BRIG_SEGMENT_READONLY) + { + tree def = is_definition ? NULL_TREE : + m_parent.global_variable (var_name); + + if (!is_definition && def != NULL_TREE) + { + /* We have a definition already for this declaration. + Use the definition instead of the declaration. */ + } + else if (gccbrig_might_be_host_defined_var_p (brigVar)) + { + tree var_decl = build_variable (brigVar); + m_parent.add_host_def_var_ptr (var_name, var_decl); + } + else + { + tree var_decl = build_variable (brigVar); + /* Make all global variables program scope for now + so we can get their address from the Runtime API. */ + DECL_CONTEXT (var_decl) = NULL_TREE; + TREE_STATIC (var_decl) = 1; + m_parent.add_global_variable (var_name, var_decl); + } + } + else if (brigVar->segment == BRIG_SEGMENT_ARG) + { + + if (m_parent.m_cf->m_generating_arg_block) + { + tree var_decl = build_variable (brigVar); + tree bind_expr = m_parent.m_cf->m_current_bind_expr; + + DECL_CONTEXT (var_decl) = m_parent.m_cf->m_func_decl; + DECL_CHAIN (var_decl) = BIND_EXPR_VARS (bind_expr); + BIND_EXPR_VARS (bind_expr) = var_decl; + TREE_PUBLIC (var_decl) = 0; + + m_parent.m_cf->add_arg_variable (brigVar, var_decl); + } + else + { + /* Must be an incoming function argument which has + been parsed in brig-function-handler.cc. No + need to generate anything here. */ + } + } + else + gcc_unreachable (); + + return base->byteCount; +} + +/* Returns the alignment for the given BRIG variable. In case the variable + explicitly defines alignment and its larger than the natural alignment, + returns it instead of the natural one. */ + +size_t +brig_directive_variable_handler::get_brig_var_alignment +(const BrigDirectiveVariable *brigVar) +{ + + size_t defined_alignment + = brigVar->align == BRIG_ALIGNMENT_NONE ? 0 : 1 << (brigVar->align - 1); + size_t natural_alignment; + if (brigVar->type & BRIG_TYPE_ARRAY) + { + tree element_type + = gccbrig_tree_type_for_hsa_type (brigVar->type & ~BRIG_TYPE_ARRAY); + size_t element_size = tree_to_uhwi (TYPE_SIZE (element_type)); + natural_alignment = element_size / BITS_PER_UNIT; + } + else + { + tree t = gccbrig_tree_type_for_hsa_type (brigVar->type); + natural_alignment = tree_to_uhwi (TYPE_SIZE (t)) / BITS_PER_UNIT; + } + + return natural_alignment > defined_alignment + ? natural_alignment : defined_alignment; +} diff --git a/gcc/brig/brigfrontend/phsa.h b/gcc/brig/brigfrontend/phsa.h new file mode 100644 index 00000000000..00e0a7cd4c6 --- /dev/null +++ b/gcc/brig/brigfrontend/phsa.h @@ -0,0 +1,69 @@ +/* phsa.h -- interfacing between the gcc BRIG FE and the phsa runtime + Copyright (C) 2016 Free Software Foundation, Inc. + Contributed by Pekka Jaaskelainen <pekka.jaaskelainen@parmance.com> + for General Processor Tech. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 3, or (at your option) any later +version. + +GCC is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +<http://www.gnu.org/licenses/>. */ + +#ifndef PHSA_H +#define PHSA_H + +#include <stdint.h> + +/* This struct is used to pass information from the BRIG FE to the + runtime of the finalizer kernel, its control directives etc. + The data is passed raw in a special ELF section named + phsa.kerneldesc.kernel_function_name. */ + +typedef struct __attribute__((__packed__)) +{ + /* Set to 1 in case the function is a kernel. */ + uint8_t is_kernel; + /* The size of the group segment used by the kernel. */ + uint32_t group_segment_size; + /* Size of the private segment used by a single work-item. */ + uint32_t private_segment_size; + /* Total size of the kernel arguments. */ + uint32_t kernarg_segment_size; + /* Maximum alignment of a kernel argument variable. */ + uint16_t kernarg_max_align; + /* Maximum size (in bytes) of dynamic group memory. */ + uint32_t max_dynamic_group_size; + /* Max number of work-items used to launch the kernel. */ + uint64_t max_flat_grid_size; + /* Max number of work-items in a work-group used to launch the kernel. */ + uint32_t max_flat_workgroup_size; + /* The grid size required by the kernel. */ + uint64_t required_grid_size[3]; + /* The work group size required by the kernel. */ + uint32_t required_workgroup_size[3]; + /* The number of dimensions required by the kernel. */ + uint8_t required_dim; + +} phsa_descriptor; + +/* The prefix to use in the ELF section containing descriptor for + a function. */ +#define PHSA_DESC_SECTION_PREFIX "phsa.desc." +#define PHSA_HOST_DEF_PTR_PREFIX "__phsa.host_def." + +/* The frontend error messages are parsed by the host runtime, known + prefix strings are used to separate the different runtime error + codes. */ +#define PHSA_ERROR_PREFIX_INCOMPATIBLE_MODULE "Incompatible module:" + +#endif |