diff options
46 files changed, 3234 insertions, 29 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog index f572df4ddcc..2b8ce47490c 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,69 @@ +2013-11-27 Aldy Hernandez <aldyh@redhat.com> + Jakub Jelinek <jakub@redhat.com> + + * cgraph.h (enum cgraph_simd_clone_arg_type): New. + (struct cgraph_simd_clone_arg, struct cgraph_simd_clone): New. + (struct cgraph_node): Add simdclone and simd_clones fields. + * config/i386/i386.c (ix86_simd_clone_compute_vecsize_and_simdlen, + ix86_simd_clone_adjust, ix86_simd_clone_usable): New functions. + (TARGET_SIMD_CLONE_COMPUTE_VECSIZE_AND_SIMDLEN, + TARGET_SIMD_CLONE_ADJUST, TARGET_SIMD_CLONE_USABLE): Define. + * doc/tm.texi.in (TARGET_SIMD_CLONE_COMPUTE_VECSIZE_AND_SIMDLEN, + TARGET_SIMD_CLONE_ADJUST, TARGET_SIMD_CLONE_USABLE): Add. + * doc/tm.texi: Regenerated. + * ggc.h (ggc_alloc_cleared_simd_clone_stat): New function. + * ipa-cp.c (determine_versionability): Fail if "omp declare simd" + attribute is present. + * omp-low.c: Include pretty-print.h, ipa-prop.h and tree-eh.h. + (simd_clone_vector_of_formal_parm_types): New function. + (simd_clone_struct_alloc, simd_clone_struct_copy, + simd_clone_vector_of_formal_parm_types, simd_clone_clauses_extract, + simd_clone_compute_base_data_type, simd_clone_mangle, + simd_clone_create, simd_clone_adjust_return_type, + create_tmp_simd_array, simd_clone_adjust_argument_types, + simd_clone_init_simd_arrays): New functions. + (struct modify_stmt_info): New type. + (ipa_simd_modify_stmt_ops, ipa_simd_modify_function_body, + simd_clone_adjust, expand_simd_clones, ipa_omp_simd_clone): New + functions. + (pass_data_omp_simd_clone): New variable. + (pass_omp_simd_clone): New class. + (make_pass_omp_simd_clone): New function. + * passes.def (pass_omp_simd_clone): New. + * target.def (TARGET_SIMD_CLONE_COMPUTE_VECSIZE_AND_SIMDLEN, + TARGET_SIMD_CLONE_ADJUST, TARGET_SIMD_CLONE_USABLE): New target + hooks. + * target.h (struct cgraph_node, struct cgraph_simd_node): Declare. + * tree-core.h (OMP_CLAUSE_LINEAR_VARIABLE_STRIDE): Document. + * tree.h (OMP_CLAUSE_LINEAR_VARIABLE_STRIDE): Define. + * tree-pass.h (make_pass_omp_simd_clone): New prototype. + * tree-vect-data-refs.c: Include cgraph.h. + (vect_analyze_data_refs): Inline by hand find_data_references_in_loop + and find_data_references_in_bb, if find_data_references_in_stmt + fails, still allow calls to #pragma omp declare simd functions + in #pragma omp simd loops unless they contain data references among + the call arguments or in lhs. + * tree-vect-loop.c (vect_determine_vectorization_factor): Handle + calls with no lhs. + (vect_transform_loop): Allow NULL STMT_VINFO_VECTYPE for calls without + lhs. + * tree-vectorizer.h (enum stmt_vec_info_type): Add + call_simd_clone_vec_info_type. + (struct _stmt_vec_info): Add simd_clone_fndecl field. + (STMT_VINFO_SIMD_CLONE_FNDECL): Define. + * tree-vect-stmts.c: Include tree-ssa-loop.h, + tree-scalar-evolution.h and cgraph.h. + (vectorizable_call): Handle calls without lhs. Assert + !stmt_can_throw_internal instead of failing for it. Don't update + EH stuff. + (struct simd_call_arg_info): New. + (vectorizable_simd_clone_call): New function. + (vect_transform_stmt): Call it. + (vect_analyze_stmt): Likewise. Allow NULL STMT_VINFO_VECTYPE for + calls without lhs. + * ipa-prop.c (ipa_add_new_function): Only call ipa_analyze_node + if cgraph_function_with_gimple_body_p is true. + 2013-11-27 Tom de Vries <tom@codesourcery.com> Marc Glisse <marc.glisse@inria.fr> diff --git a/gcc/c/ChangeLog b/gcc/c/ChangeLog index 1b7af32a0cc..397e5f69979 100644 --- a/gcc/c/ChangeLog +++ b/gcc/c/ChangeLog @@ -1,3 +1,9 @@ +2013-11-27 Aldy Hernandez <aldyh@redhat.com> + Jakub Jelinek <jakub@redhat.com> + + * c-decl.c (c_builtin_function_ext_scope): Avoid binding if + external_scope is NULL. + 2013-11-27 Tom de Vries <tom@codesourcery.com> Marc Glisse <marc.glisse@inria.fr> diff --git a/gcc/c/c-decl.c b/gcc/c/c-decl.c index 1d4f1af41a7..c019a3eec32 100644 --- a/gcc/c/c-decl.c +++ b/gcc/c/c-decl.c @@ -3646,8 +3646,9 @@ c_builtin_function_ext_scope (tree decl) const char *name = IDENTIFIER_POINTER (id); C_DECL_BUILTIN_PROTOTYPE (decl) = prototype_p (type); - bind (id, decl, external_scope, /*invisible=*/false, /*nested=*/false, - UNKNOWN_LOCATION); + if (external_scope) + bind (id, decl, external_scope, /*invisible=*/false, /*nested=*/false, + UNKNOWN_LOCATION); /* Builtins in the implementation namespace are made visible without needing to be explicitly declared. See push_file_scope. */ diff --git a/gcc/cgraph.h b/gcc/cgraph.h index c6b35f9cfc4..547950498d0 100644 --- a/gcc/cgraph.h +++ b/gcc/cgraph.h @@ -256,6 +256,99 @@ struct GTY(()) cgraph_clone_info bitmap combined_args_to_skip; }; +enum cgraph_simd_clone_arg_type +{ + SIMD_CLONE_ARG_TYPE_VECTOR, + SIMD_CLONE_ARG_TYPE_UNIFORM, + SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP, + SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP, + SIMD_CLONE_ARG_TYPE_MASK +}; + +/* Function arguments in the original function of a SIMD clone. + Supplementary data for `struct simd_clone'. */ + +struct GTY(()) cgraph_simd_clone_arg { + /* Original function argument as it originally existed in + DECL_ARGUMENTS. */ + tree orig_arg; + + /* orig_arg's function (or for extern functions type from + TYPE_ARG_TYPES). */ + tree orig_type; + + /* If argument is a vector, this holds the vector version of + orig_arg that after adjusting the argument types will live in + DECL_ARGUMENTS. Otherwise, this is NULL. + + This basically holds: + vector(simdlen) __typeof__(orig_arg) new_arg. */ + tree vector_arg; + + /* vector_arg's type (or for extern functions new vector type. */ + tree vector_type; + + /* If argument is a vector, this holds the array where the simd + argument is held while executing the simd clone function. This + is a local variable in the cloned function. Its content is + copied from vector_arg upon entry to the clone. + + This basically holds: + __typeof__(orig_arg) simd_array[simdlen]. */ + tree simd_array; + + /* A SIMD clone's argument can be either linear (constant or + variable), uniform, or vector. */ + enum cgraph_simd_clone_arg_type arg_type; + + /* For arg_type SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP this is + the constant linear step, if arg_type is + SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP, this is index of + the uniform argument holding the step, otherwise 0. */ + HOST_WIDE_INT linear_step; + + /* Variable alignment if available, otherwise 0. */ + unsigned int alignment; +}; + +/* Specific data for a SIMD function clone. */ + +struct GTY(()) cgraph_simd_clone { + /* Number of words in the SIMD lane associated with this clone. */ + unsigned int simdlen; + + /* Number of annotated function arguments in `args'. This is + usually the number of named arguments in FNDECL. */ + unsigned int nargs; + + /* Max hardware vector size in bits for integral vectors. */ + unsigned int vecsize_int; + + /* Max hardware vector size in bits for floating point vectors. */ + unsigned int vecsize_float; + + /* The mangling character for a given vector size. This is is used + to determine the ISA mangling bit as specified in the Intel + Vector ABI. */ + unsigned char vecsize_mangle; + + /* True if this is the masked, in-branch version of the clone, + otherwise false. */ + unsigned int inbranch : 1; + + /* True if this is a Cilk Plus variant. */ + unsigned int cilk_elemental : 1; + + /* Doubly linked list of SIMD clones. */ + struct cgraph_node *prev_clone, *next_clone; + + /* Original cgraph node the SIMD clones were created for. */ + struct cgraph_node *origin; + + /* Annotated function arguments for the original function. */ + struct cgraph_simd_clone_arg GTY((length ("%h.nargs"))) args[1]; +}; + /* The cgraph data structure. Each function decl has assigned cgraph_node listing callees and callers. */ @@ -284,6 +377,12 @@ public: /* Declaration node used to be clone of. */ tree former_clone_of; + /* If this is a SIMD clone, this points to the SIMD specific + information for it. */ + struct cgraph_simd_clone *simdclone; + /* If this function has SIMD clones, this points to the first clone. */ + struct cgraph_node *simd_clones; + /* Interprocedural passes scheduled to have their transform functions applied next time we execute local pass on them. We maintain it per-function in order to allow IPA passes to introduce new functions. */ diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index fd4bb671ad9..6d2fd69350d 100644 --- a/gcc/config/i386/i386.c +++ b/gcc/config/i386/i386.c @@ -43690,6 +43690,184 @@ ix86_memmodel_check (unsigned HOST_WIDE_INT val) return val; } +/* Set CLONEI->vecsize_mangle, CLONEI->vecsize_int, + CLONEI->vecsize_float and if CLONEI->simdlen is 0, also + CLONEI->simdlen. Return 0 if SIMD clones shouldn't be emitted, + or number of vecsize_mangle variants that should be emitted. */ + +static int +ix86_simd_clone_compute_vecsize_and_simdlen (struct cgraph_node *node, + struct cgraph_simd_clone *clonei, + tree base_type, int num) +{ + int ret = 1; + + if (clonei->simdlen + && (clonei->simdlen < 2 + || clonei->simdlen > 16 + || (clonei->simdlen & (clonei->simdlen - 1)) != 0)) + { + warning_at (DECL_SOURCE_LOCATION (node->decl), 0, + "unsupported simdlen %d\n", clonei->simdlen); + return 0; + } + + tree ret_type = TREE_TYPE (TREE_TYPE (node->decl)); + if (TREE_CODE (ret_type) != VOID_TYPE) + switch (TYPE_MODE (ret_type)) + { + case QImode: + case HImode: + case SImode: + case DImode: + case SFmode: + case DFmode: + /* case SCmode: */ + /* case DCmode: */ + break; + default: + warning_at (DECL_SOURCE_LOCATION (node->decl), 0, + "unsupported return type %qT for simd\n", ret_type); + return 0; + } + + tree t; + int i; + + for (t = DECL_ARGUMENTS (node->decl), i = 0; t; t = DECL_CHAIN (t), i++) + /* FIXME: Shouldn't we allow such arguments if they are uniform? */ + switch (TYPE_MODE (TREE_TYPE (t))) + { + case QImode: + case HImode: + case SImode: + case DImode: + case SFmode: + case DFmode: + /* case SCmode: */ + /* case DCmode: */ + break; + default: + warning_at (DECL_SOURCE_LOCATION (node->decl), 0, + "unsupported argument type %qT for simd\n", TREE_TYPE (t)); + return 0; + } + + if (clonei->cilk_elemental) + { + /* Parse here processor clause. If not present, default to 'b'. */ + clonei->vecsize_mangle = 'b'; + } + else if (!TREE_PUBLIC (node->decl)) + { + /* If the function isn't exported, we can pick up just one ISA + for the clones. */ + if (TARGET_AVX2) + clonei->vecsize_mangle = 'd'; + else if (TARGET_AVX) + clonei->vecsize_mangle = 'c'; + else + clonei->vecsize_mangle = 'b'; + ret = 1; + } + else + { + clonei->vecsize_mangle = "bcd"[num]; + ret = 3; + } + switch (clonei->vecsize_mangle) + { + case 'b': + clonei->vecsize_int = 128; + clonei->vecsize_float = 128; + break; + case 'c': + clonei->vecsize_int = 128; + clonei->vecsize_float = 256; + break; + case 'd': + clonei->vecsize_int = 256; + clonei->vecsize_float = 256; + break; + } + if (clonei->simdlen == 0) + { + if (SCALAR_INT_MODE_P (TYPE_MODE (base_type))) + clonei->simdlen = clonei->vecsize_int; + else + clonei->simdlen = clonei->vecsize_float; + clonei->simdlen /= GET_MODE_BITSIZE (TYPE_MODE (base_type)); + if (clonei->simdlen > 16) + clonei->simdlen = 16; + } + return ret; +} + +/* Add target attribute to SIMD clone NODE if needed. */ + +static void +ix86_simd_clone_adjust (struct cgraph_node *node) +{ + const char *str = NULL; + gcc_assert (node->decl == cfun->decl); + switch (node->simdclone->vecsize_mangle) + { + case 'b': + if (!TARGET_SSE2) + str = "sse2"; + break; + case 'c': + if (!TARGET_AVX) + str = "avx"; + break; + case 'd': + if (!TARGET_AVX2) + str = "avx2"; + break; + default: + gcc_unreachable (); + } + if (str == NULL) + return; + push_cfun (NULL); + tree args = build_tree_list (NULL_TREE, build_string (strlen (str), str)); + bool ok = ix86_valid_target_attribute_p (node->decl, NULL, args, 0); + gcc_assert (ok); + pop_cfun (); + ix86_previous_fndecl = NULL_TREE; + ix86_set_current_function (node->decl); +} + +/* If SIMD clone NODE can't be used in a vectorized loop + in current function, return -1, otherwise return a badness of using it + (0 if it is most desirable from vecsize_mangle point of view, 1 + slightly less desirable, etc.). */ + +static int +ix86_simd_clone_usable (struct cgraph_node *node) +{ + switch (node->simdclone->vecsize_mangle) + { + case 'b': + if (!TARGET_SSE2) + return -1; + if (!TARGET_AVX) + return 0; + return TARGET_AVX2 ? 2 : 1; + case 'c': + if (!TARGET_AVX) + return -1; + return TARGET_AVX2 ? 1 : 0; + break; + case 'd': + if (!TARGET_AVX2) + return -1; + return 0; + default: + gcc_unreachable (); + } +} + /* Implement TARGET_FLOAT_EXCEPTIONS_ROUNDING_SUPPORTED_P. */ static bool @@ -44178,6 +44356,18 @@ ix86_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update) #undef TARGET_SPILL_CLASS #define TARGET_SPILL_CLASS ix86_spill_class +#undef TARGET_SIMD_CLONE_COMPUTE_VECSIZE_AND_SIMDLEN +#define TARGET_SIMD_CLONE_COMPUTE_VECSIZE_AND_SIMDLEN \ + ix86_simd_clone_compute_vecsize_and_simdlen + +#undef TARGET_SIMD_CLONE_ADJUST +#define TARGET_SIMD_CLONE_ADJUST \ + ix86_simd_clone_adjust + +#undef TARGET_SIMD_CLONE_USABLE +#define TARGET_SIMD_CLONE_USABLE \ + ix86_simd_clone_usable + #undef TARGET_FLOAT_EXCEPTIONS_ROUNDING_SUPPORTED_P #define TARGET_FLOAT_EXCEPTIONS_ROUNDING_SUPPORTED_P \ ix86_float_exceptions_rounding_supported_p diff --git a/gcc/cp/ChangeLog b/gcc/cp/ChangeLog index 4d4e0e4b22f..c45ddbec7bd 100644 --- a/gcc/cp/ChangeLog +++ b/gcc/cp/ChangeLog @@ -1,3 +1,9 @@ +2013-11-27 Aldy Hernandez <aldyh@redhat.com> + Jakub Jelinek <jakub@redhat.com> + + * semantics.c (finish_omp_clauses): For #pragma omp declare simd + linear clause step call maybe_constant_value. + 2013-11-27 Tom de Vries <tom@codesourcery.com> Marc Glisse <marc.glisse@inria.fr> diff --git a/gcc/cp/semantics.c b/gcc/cp/semantics.c index 083c4f46f28..e72a9c2f169 100644 --- a/gcc/cp/semantics.c +++ b/gcc/cp/semantics.c @@ -5202,6 +5202,8 @@ finish_omp_clauses (tree clauses) t = mark_rvalue_use (t); if (!processing_template_decl) { + if (TREE_CODE (OMP_CLAUSE_DECL (c)) == PARM_DECL) + t = maybe_constant_value (t); t = fold_build_cleanup_point_expr (TREE_TYPE (t), t); if (TREE_CODE (TREE_TYPE (OMP_CLAUSE_DECL (c))) == POINTER_TYPE) diff --git a/gcc/doc/tm.texi b/gcc/doc/tm.texi index 925d93f6c26..966576fe721 100644 --- a/gcc/doc/tm.texi +++ b/gcc/doc/tm.texi @@ -5818,6 +5818,26 @@ The default is @code{NULL_TREE} which means to not vectorize gather loads. @end deftypefn +@deftypefn {Target Hook} int TARGET_SIMD_CLONE_COMPUTE_VECSIZE_AND_SIMDLEN (struct cgraph_node *@var{}, struct cgraph_simd_clone *@var{}, @var{tree}, @var{int}) +This hook should set @var{vecsize_mangle}, @var{vecsize_int}, @var{vecsize_float} +fields in @var{simd_clone} structure pointed by @var{clone_info} argument and also +@var{simdlen} field if it was previously 0. +The hook should return 0 if SIMD clones shouldn't be emitted, +or number of @var{vecsize_mangle} variants that should be emitted. +@end deftypefn + +@deftypefn {Target Hook} void TARGET_SIMD_CLONE_ADJUST (struct cgraph_node *@var{}) +This hook should add implicit @code{attribute(target("..."))} attribute +to SIMD clone @var{node} if needed. +@end deftypefn + +@deftypefn {Target Hook} int TARGET_SIMD_CLONE_USABLE (struct cgraph_node *@var{}) +This hook should return -1 if SIMD clone @var{node} shouldn't be used +in vectorized loops in current function, or non-negative number if it is +usable. In that case, the smaller the number is, the more desirable it is +to use it. +@end deftypefn + @node Anchored Addresses @section Anchored Addresses @cindex anchored addresses diff --git a/gcc/doc/tm.texi.in b/gcc/doc/tm.texi.in index edca60089c2..63f4edb3a43 100644 --- a/gcc/doc/tm.texi.in +++ b/gcc/doc/tm.texi.in @@ -4422,6 +4422,12 @@ address; but often a machine-dependent strategy can generate better code. @hook TARGET_VECTORIZE_BUILTIN_GATHER +@hook TARGET_SIMD_CLONE_COMPUTE_VECSIZE_AND_SIMDLEN + +@hook TARGET_SIMD_CLONE_ADJUST + +@hook TARGET_SIMD_CLONE_USABLE + @node Anchored Addresses @section Anchored Addresses @cindex anchored addresses diff --git a/gcc/ggc.h b/gcc/ggc.h index bb8f939513d..f9033445270 100644 --- a/gcc/ggc.h +++ b/gcc/ggc.h @@ -276,4 +276,11 @@ ggc_alloc_cleared_gimple_statement_stat (size_t s MEM_STAT_DECL) ggc_internal_cleared_alloc_stat (s PASS_MEM_STAT); } +static inline struct simd_clone * +ggc_alloc_cleared_simd_clone_stat (size_t s MEM_STAT_DECL) +{ + return (struct simd_clone *) + ggc_internal_cleared_alloc_stat (s PASS_MEM_STAT); +} + #endif diff --git a/gcc/ipa-cp.c b/gcc/ipa-cp.c index c1634a3c69c..6fd7e9cfb95 100644 --- a/gcc/ipa-cp.c +++ b/gcc/ipa-cp.c @@ -430,6 +430,13 @@ determine_versionability (struct cgraph_node *node) reason = "not a tree_versionable_function"; else if (cgraph_function_body_availability (node) <= AVAIL_OVERWRITABLE) reason = "insufficient body availability"; + else if (lookup_attribute ("omp declare simd", DECL_ATTRIBUTES (node->decl))) + { + /* Ideally we should clone the SIMD clones themselves and create + vector copies of them, so IPA-cp and SIMD clones can happily + coexist, but that may not be worth the effort. */ + reason = "function has SIMD clones"; + } if (reason && dump_file && !node->alias && !node->thunk.thunk_p) fprintf (dump_file, "Function %s/%i is not versionable, reason: %s.\n", diff --git a/gcc/ipa-prop.c b/gcc/ipa-prop.c index 6bdb0dfc735..2254d0de421 100644 --- a/gcc/ipa-prop.c +++ b/gcc/ipa-prop.c @@ -3217,7 +3217,8 @@ ipa_node_duplication_hook (struct cgraph_node *src, struct cgraph_node *dst, static void ipa_add_new_function (struct cgraph_node *node, void *data ATTRIBUTE_UNUSED) { - ipa_analyze_node (node); + if (cgraph_function_with_gimple_body_p (node)) + ipa_analyze_node (node); } /* Register our cgraph hooks if they are not already there. */ diff --git a/gcc/ipa.c b/gcc/ipa.c index 70b1d01873f..042c501ae88 100644 --- a/gcc/ipa.c +++ b/gcc/ipa.c @@ -426,6 +426,19 @@ symtab_remove_unreachable_nodes (bool before_inlining_p, FILE *file) enqueue_node (cnode, &first, reachable); } } + + } + /* If any reachable function has simd clones, mark them as + reachable as well. */ + if (cnode->simd_clones) + { + cgraph_node *next; + for (next = cnode->simd_clones; + next; + next = next->simdclone->next_clone) + if (in_boundary_p + || !pointer_set_insert (reachable, next)) + enqueue_node (next, &first, reachable); } } /* When we see constructor of external variable, keep referred nodes in the diff --git a/gcc/omp-low.c b/gcc/omp-low.c index b980825defb..71dc8118734 100644 --- a/gcc/omp-low.c +++ b/gcc/omp-low.c @@ -67,7 +67,10 @@ along with GCC; see the file COPYING3. If not see #include "omp-low.h" #include "gimple-low.h" #include "tree-cfgcleanup.h" +#include "pretty-print.h" +#include "ipa-prop.h" #include "tree-nested.h" +#include "tree-eh.h" /* Lowering of OpenMP parallel and workshare constructs proceeds in two @@ -10615,5 +10618,1163 @@ make_pass_diagnose_omp_blocks (gcc::context *ctxt) { return new pass_diagnose_omp_blocks (ctxt); } + +/* SIMD clone supporting code. */ + +/* Allocate a fresh `simd_clone' and return it. NARGS is the number + of arguments to reserve space for. */ + +static struct cgraph_simd_clone * +simd_clone_struct_alloc (int nargs) +{ + struct cgraph_simd_clone *clone_info; + size_t len = (sizeof (struct cgraph_simd_clone) + + nargs * sizeof (struct cgraph_simd_clone_arg)); + clone_info = (struct cgraph_simd_clone *) + ggc_internal_cleared_alloc_stat (len PASS_MEM_STAT); + return clone_info; +} + +/* Make a copy of the `struct cgraph_simd_clone' in FROM to TO. */ + +static inline void +simd_clone_struct_copy (struct cgraph_simd_clone *to, + struct cgraph_simd_clone *from) +{ + memcpy (to, from, (sizeof (struct cgraph_simd_clone) + + from->nargs * sizeof (struct cgraph_simd_clone_arg))); +} + +/* Return vector of parameter types of function FNDECL. This uses + TYPE_ARG_TYPES if available, otherwise falls back to types of + DECL_ARGUMENTS types. */ + +vec<tree> +simd_clone_vector_of_formal_parm_types (tree fndecl) +{ + if (TYPE_ARG_TYPES (TREE_TYPE (fndecl))) + return ipa_get_vector_of_formal_parm_types (TREE_TYPE (fndecl)); + vec<tree> args = ipa_get_vector_of_formal_parms (fndecl); + unsigned int i; + tree arg; + FOR_EACH_VEC_ELT (args, i, arg) + args[i] = TREE_TYPE (args[i]); + return args; +} + +/* Given a simd function in NODE, extract the simd specific + information from the OMP clauses passed in CLAUSES, and return + the struct cgraph_simd_clone * if it should be cloned. *INBRANCH_SPECIFIED + is set to TRUE if the `inbranch' or `notinbranch' clause specified, + otherwise set to FALSE. */ + +static struct cgraph_simd_clone * +simd_clone_clauses_extract (struct cgraph_node *node, tree clauses, + bool *inbranch_specified) +{ + vec<tree> args = simd_clone_vector_of_formal_parm_types (node->decl); + tree t; + int n; + *inbranch_specified = false; + + n = args.length (); + if (n > 0 && args.last () == void_type_node) + n--; + + /* To distinguish from an OpenMP simd clone, Cilk Plus functions to + be cloned have a distinctive artificial label in addition to "omp + declare simd". */ + bool cilk_clone + = (flag_enable_cilkplus + && lookup_attribute ("cilk plus elemental", + DECL_ATTRIBUTES (node->decl))); + + /* Allocate one more than needed just in case this is an in-branch + clone which will require a mask argument. */ + struct cgraph_simd_clone *clone_info = simd_clone_struct_alloc (n + 1); + clone_info->nargs = n; + clone_info->cilk_elemental = cilk_clone; + + if (!clauses) + { + args.release (); + return clone_info; + } + clauses = TREE_VALUE (clauses); + if (!clauses || TREE_CODE (clauses) != OMP_CLAUSE) + return clone_info; + + for (t = clauses; t; t = OMP_CLAUSE_CHAIN (t)) + { + switch (OMP_CLAUSE_CODE (t)) + { + case OMP_CLAUSE_INBRANCH: + clone_info->inbranch = 1; + *inbranch_specified = true; + break; + case OMP_CLAUSE_NOTINBRANCH: + clone_info->inbranch = 0; + *inbranch_specified = true; + break; + case OMP_CLAUSE_SIMDLEN: + clone_info->simdlen + = TREE_INT_CST_LOW (OMP_CLAUSE_SIMDLEN_EXPR (t)); + break; + case OMP_CLAUSE_LINEAR: + { + tree decl = OMP_CLAUSE_DECL (t); + tree step = OMP_CLAUSE_LINEAR_STEP (t); + int argno = TREE_INT_CST_LOW (decl); + if (OMP_CLAUSE_LINEAR_VARIABLE_STRIDE (t)) + { + clone_info->args[argno].arg_type + = SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP; + clone_info->args[argno].linear_step = tree_to_shwi (step); + gcc_assert (clone_info->args[argno].linear_step >= 0 + && clone_info->args[argno].linear_step < n); + } + else + { + if (POINTER_TYPE_P (args[argno])) + step = fold_convert (ssizetype, step); + if (!tree_fits_shwi_p (step)) + { + warning_at (OMP_CLAUSE_LOCATION (t), 0, + "ignoring large linear step"); + args.release (); + return NULL; + } + else if (integer_zerop (step)) + { + warning_at (OMP_CLAUSE_LOCATION (t), 0, + "ignoring zero linear step"); + args.release (); + return NULL; + } + else + { + clone_info->args[argno].arg_type + = SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP; + clone_info->args[argno].linear_step = tree_to_shwi (step); + } + } + break; + } + case OMP_CLAUSE_UNIFORM: + { + tree decl = OMP_CLAUSE_DECL (t); + int argno = tree_to_uhwi (decl); + clone_info->args[argno].arg_type + = SIMD_CLONE_ARG_TYPE_UNIFORM; + break; + } + case OMP_CLAUSE_ALIGNED: + { + tree decl = OMP_CLAUSE_DECL (t); + int argno = tree_to_uhwi (decl); + clone_info->args[argno].alignment + = TREE_INT_CST_LOW (OMP_CLAUSE_ALIGNED_ALIGNMENT (t)); + break; + } + default: + break; + } + } + args.release (); + return clone_info; +} + +/* Given a SIMD clone in NODE, calculate the characteristic data + type and return the coresponding type. The characteristic data + type is computed as described in the Intel Vector ABI. */ + +static tree +simd_clone_compute_base_data_type (struct cgraph_node *node, + struct cgraph_simd_clone *clone_info) +{ + tree type = integer_type_node; + tree fndecl = node->decl; + + /* a) For non-void function, the characteristic data type is the + return type. */ + if (TREE_CODE (TREE_TYPE (TREE_TYPE (fndecl))) != VOID_TYPE) + type = TREE_TYPE (TREE_TYPE (fndecl)); + + /* b) If the function has any non-uniform, non-linear parameters, + then the characteristic data type is the type of the first + such parameter. */ + else + { + vec<tree> map = simd_clone_vector_of_formal_parm_types (fndecl); + for (unsigned int i = 0; i < clone_info->nargs; ++i) + if (clone_info->args[i].arg_type == SIMD_CLONE_ARG_TYPE_VECTOR) + { + type = map[i]; + break; + } + map.release (); + } + + /* c) If the characteristic data type determined by a) or b) above + is struct, union, or class type which is pass-by-value (except + for the type that maps to the built-in complex data type), the + characteristic data type is int. */ + if (RECORD_OR_UNION_TYPE_P (type) + && !aggregate_value_p (type, NULL) + && TREE_CODE (type) != COMPLEX_TYPE) + return integer_type_node; + + /* d) If none of the above three classes is applicable, the + characteristic data type is int. */ + + return type; + + /* e) For Intel Xeon Phi native and offload compilation, if the + resulting characteristic data type is 8-bit or 16-bit integer + data type, the characteristic data type is int. */ + /* Well, we don't handle Xeon Phi yet. */ +} + +static tree +simd_clone_mangle (struct cgraph_node *node, + struct cgraph_simd_clone *clone_info) +{ + char vecsize_mangle = clone_info->vecsize_mangle; + char mask = clone_info->inbranch ? 'M' : 'N'; + unsigned int simdlen = clone_info->simdlen; + unsigned int n; + pretty_printer pp; + + gcc_assert (vecsize_mangle && simdlen); + + pp_string (&pp, "_ZGV"); + pp_character (&pp, vecsize_mangle); + pp_character (&pp, mask); + pp_decimal_int (&pp, simdlen); + + for (n = 0; n < clone_info->nargs; ++n) + { + struct cgraph_simd_clone_arg arg = clone_info->args[n]; + + if (arg.arg_type == SIMD_CLONE_ARG_TYPE_UNIFORM) + pp_character (&pp, 'u'); + else if (arg.arg_type == SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP) + { + gcc_assert (arg.linear_step != 0); + pp_character (&pp, 'l'); + if (arg.linear_step > 1) + pp_unsigned_wide_integer (&pp, arg.linear_step); + else if (arg.linear_step < 0) + { + pp_character (&pp, 'n'); + pp_unsigned_wide_integer (&pp, (-(unsigned HOST_WIDE_INT) + arg.linear_step)); + } + } + else if (arg.arg_type == SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP) + { + pp_character (&pp, 's'); + pp_unsigned_wide_integer (&pp, arg.linear_step); + } + else + pp_character (&pp, 'v'); + if (arg.alignment) + { + pp_character (&pp, 'a'); + pp_decimal_int (&pp, arg.alignment); + } + } + + pp_underscore (&pp); + pp_string (&pp, + IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (node->decl))); + const char *str = pp_formatted_text (&pp); + + /* If there already is a SIMD clone with the same mangled name, don't + add another one. This can happen e.g. for + #pragma omp declare simd + #pragma omp declare simd simdlen(8) + int foo (int, int); + if the simdlen is assumed to be 8 for the first one, etc. */ + for (struct cgraph_node *clone = node->simd_clones; clone; + clone = clone->simdclone->next_clone) + if (strcmp (IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (clone->decl)), + str) == 0) + return NULL_TREE; + + return get_identifier (str); +} + +/* Create a simd clone of OLD_NODE and return it. */ + +static struct cgraph_node * +simd_clone_create (struct cgraph_node *old_node) +{ + struct cgraph_node *new_node; + if (old_node->definition) + new_node = cgraph_function_versioning (old_node, vNULL, NULL, NULL, false, + NULL, NULL, "simdclone"); + else + { + tree old_decl = old_node->decl; + tree new_decl = copy_node (old_node->decl); + DECL_NAME (new_decl) = clone_function_name (old_decl, "simdclone"); + SET_DECL_ASSEMBLER_NAME (new_decl, DECL_NAME (new_decl)); + SET_DECL_RTL (new_decl, NULL); + DECL_STATIC_CONSTRUCTOR (new_decl) = 0; + DECL_STATIC_DESTRUCTOR (new_decl) = 0; + new_node + = cgraph_copy_node_for_versioning (old_node, new_decl, vNULL, NULL); + cgraph_call_function_insertion_hooks (new_node); + } + if (new_node == NULL) + return new_node; + + TREE_PUBLIC (new_node->decl) = TREE_PUBLIC (old_node->decl); + + /* The function cgraph_function_versioning () will force the new + symbol local. Undo this, and inherit external visability from + the old node. */ + new_node->local.local = old_node->local.local; + new_node->externally_visible = old_node->externally_visible; + + return new_node; +} + +/* Adjust the return type of the given function to its appropriate + vector counterpart. Returns a simd array to be used throughout the + function as a return value. */ + +static tree +simd_clone_adjust_return_type (struct cgraph_node *node) +{ + tree fndecl = node->decl; + tree orig_rettype = TREE_TYPE (TREE_TYPE (fndecl)); + unsigned int veclen; + tree t; + + /* Adjust the function return type. */ + if (orig_rettype == void_type_node) + return NULL_TREE; + TREE_TYPE (fndecl) = build_distinct_type_copy (TREE_TYPE (fndecl)); + if (INTEGRAL_TYPE_P (TREE_TYPE (TREE_TYPE (fndecl))) + || POINTER_TYPE_P (TREE_TYPE (TREE_TYPE (fndecl)))) + veclen = node->simdclone->vecsize_int; + else + veclen = node->simdclone->vecsize_float; + veclen /= GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (TREE_TYPE (fndecl)))); + if (veclen > node->simdclone->simdlen) + veclen = node->simdclone->simdlen; + if (veclen == node->simdclone->simdlen) + TREE_TYPE (TREE_TYPE (fndecl)) + = build_vector_type (TREE_TYPE (TREE_TYPE (fndecl)), + node->simdclone->simdlen); + else + { + t = build_vector_type (TREE_TYPE (TREE_TYPE (fndecl)), veclen); + t = build_array_type_nelts (t, node->simdclone->simdlen / veclen); + TREE_TYPE (TREE_TYPE (fndecl)) = t; + } + if (!node->definition) + return NULL_TREE; + + t = DECL_RESULT (fndecl); + /* Adjust the DECL_RESULT. */ + gcc_assert (TREE_TYPE (t) != void_type_node); + TREE_TYPE (t) = TREE_TYPE (TREE_TYPE (fndecl)); + relayout_decl (t); + + tree atype = build_array_type_nelts (orig_rettype, + node->simdclone->simdlen); + if (veclen != node->simdclone->simdlen) + return build1 (VIEW_CONVERT_EXPR, atype, t); + + /* Set up a SIMD array to use as the return value. */ + tree retval = create_tmp_var_raw (atype, "retval"); + gimple_add_tmp_var (retval); + return retval; +} + +/* Each vector argument has a corresponding array to be used locally + as part of the eventual loop. Create such temporary array and + return it. + + PREFIX is the prefix to be used for the temporary. + + TYPE is the inner element type. + + SIMDLEN is the number of elements. */ + +static tree +create_tmp_simd_array (const char *prefix, tree type, int simdlen) +{ + tree atype = build_array_type_nelts (type, simdlen); + tree avar = create_tmp_var_raw (atype, prefix); + gimple_add_tmp_var (avar); + return avar; +} + +/* Modify the function argument types to their corresponding vector + counterparts if appropriate. Also, create one array for each simd + argument to be used locally when using the function arguments as + part of the loop. + + NODE is the function whose arguments are to be adjusted. + + Returns an adjustment vector that will be filled describing how the + argument types will be adjusted. */ + +static ipa_parm_adjustment_vec +simd_clone_adjust_argument_types (struct cgraph_node *node) +{ + vec<tree> args; + ipa_parm_adjustment_vec adjustments; + + if (node->definition) + args = ipa_get_vector_of_formal_parms (node->decl); + else + args = simd_clone_vector_of_formal_parm_types (node->decl); + adjustments.create (args.length ()); + unsigned i, j, veclen; + struct ipa_parm_adjustment adj; + for (i = 0; i < node->simdclone->nargs; ++i) + { + memset (&adj, 0, sizeof (adj)); + tree parm = args[i]; + tree parm_type = node->definition ? TREE_TYPE (parm) : parm; + adj.base_index = i; + adj.base = parm; + + node->simdclone->args[i].orig_arg = node->definition ? parm : NULL_TREE; + node->simdclone->args[i].orig_type = parm_type; + + if (node->simdclone->args[i].arg_type != SIMD_CLONE_ARG_TYPE_VECTOR) + { + /* No adjustment necessary for scalar arguments. */ + adj.op = IPA_PARM_OP_COPY; + } + else + { + if (INTEGRAL_TYPE_P (parm_type) || POINTER_TYPE_P (parm_type)) + veclen = node->simdclone->vecsize_int; + else + veclen = node->simdclone->vecsize_float; + veclen /= GET_MODE_BITSIZE (TYPE_MODE (parm_type)); + if (veclen > node->simdclone->simdlen) + veclen = node->simdclone->simdlen; + adj.arg_prefix = "simd"; + adj.type = build_vector_type (parm_type, veclen); + node->simdclone->args[i].vector_type = adj.type; + for (j = veclen; j < node->simdclone->simdlen; j += veclen) + { + adjustments.safe_push (adj); + if (j == veclen) + { + memset (&adj, 0, sizeof (adj)); + adj.op = IPA_PARM_OP_NEW; + adj.arg_prefix = "simd"; + adj.base_index = i; + adj.type = node->simdclone->args[i].vector_type; + } + } + + if (node->definition) + node->simdclone->args[i].simd_array + = create_tmp_simd_array (IDENTIFIER_POINTER (DECL_NAME (parm)), + parm_type, node->simdclone->simdlen); + } + adjustments.safe_push (adj); + } + + if (node->simdclone->inbranch) + { + tree base_type + = simd_clone_compute_base_data_type (node->simdclone->origin, + node->simdclone); + + memset (&adj, 0, sizeof (adj)); + adj.op = IPA_PARM_OP_NEW; + adj.arg_prefix = "mask"; + + adj.base_index = i; + if (INTEGRAL_TYPE_P (base_type) || POINTER_TYPE_P (base_type)) + veclen = node->simdclone->vecsize_int; + else + veclen = node->simdclone->vecsize_float; + veclen /= GET_MODE_BITSIZE (TYPE_MODE (base_type)); + if (veclen > node->simdclone->simdlen) + veclen = node->simdclone->simdlen; + adj.type = build_vector_type (base_type, veclen); + adjustments.safe_push (adj); + + for (j = veclen; j < node->simdclone->simdlen; j += veclen) + adjustments.safe_push (adj); + + /* We have previously allocated one extra entry for the mask. Use + it and fill it. */ + struct cgraph_simd_clone *sc = node->simdclone; + sc->nargs++; + if (node->definition) + { + sc->args[i].orig_arg + = build_decl (UNKNOWN_LOCATION, PARM_DECL, NULL, base_type); + sc->args[i].simd_array + = create_tmp_simd_array ("mask", base_type, sc->simdlen); + } + sc->args[i].orig_type = base_type; + sc->args[i].arg_type = SIMD_CLONE_ARG_TYPE_MASK; + } + + if (node->definition) + ipa_modify_formal_parameters (node->decl, adjustments); + else + { + tree new_arg_types = NULL_TREE, new_reversed; + bool last_parm_void = false; + if (args.length () > 0 && args.last () == void_type_node) + last_parm_void = true; + + gcc_assert (TYPE_ARG_TYPES (TREE_TYPE (node->decl))); + j = adjustments.length (); + for (i = 0; i < j; i++) + { + struct ipa_parm_adjustment *adj = &adjustments[i]; + tree ptype; + if (adj->op == IPA_PARM_OP_COPY) + ptype = args[adj->base_index]; + else + ptype = adj->type; + new_arg_types = tree_cons (NULL_TREE, ptype, new_arg_types); + } + new_reversed = nreverse (new_arg_types); + if (last_parm_void) + { + if (new_reversed) + TREE_CHAIN (new_arg_types) = void_list_node; + else + new_reversed = void_list_node; + } + + tree new_type = build_distinct_type_copy (TREE_TYPE (node->decl)); + TYPE_ARG_TYPES (new_type) = new_reversed; + TREE_TYPE (node->decl) = new_type; + + adjustments.release (); + } + args.release (); + return adjustments; +} + +/* Initialize and copy the function arguments in NODE to their + corresponding local simd arrays. Returns a fresh gimple_seq with + the instruction sequence generated. */ + +static gimple_seq +simd_clone_init_simd_arrays (struct cgraph_node *node, + ipa_parm_adjustment_vec adjustments) +{ + gimple_seq seq = NULL; + unsigned i = 0, j = 0, k; + + for (tree arg = DECL_ARGUMENTS (node->decl); + arg; + arg = DECL_CHAIN (arg), i++, j++) + { + if (adjustments[j].op == IPA_PARM_OP_COPY) + continue; + + node->simdclone->args[i].vector_arg = arg; + + tree array = node->simdclone->args[i].simd_array; + if (TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg)) == node->simdclone->simdlen) + { + tree ptype = build_pointer_type (TREE_TYPE (TREE_TYPE (array))); + tree ptr = build_fold_addr_expr (array); + tree t = build2 (MEM_REF, TREE_TYPE (arg), ptr, + build_int_cst (ptype, 0)); + t = build2 (MODIFY_EXPR, TREE_TYPE (t), t, arg); + gimplify_and_add (t, &seq); + } + else + { + unsigned int simdlen = TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg)); + tree ptype = build_pointer_type (TREE_TYPE (TREE_TYPE (array))); + for (k = 0; k < node->simdclone->simdlen; k += simdlen) + { + tree ptr = build_fold_addr_expr (array); + int elemsize; + if (k) + { + arg = DECL_CHAIN (arg); + j++; + } + elemsize + = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (TREE_TYPE (arg)))); + tree t = build2 (MEM_REF, TREE_TYPE (arg), ptr, + build_int_cst (ptype, k * elemsize)); + t = build2 (MODIFY_EXPR, TREE_TYPE (t), t, arg); + gimplify_and_add (t, &seq); + } + } + } + return seq; +} + +/* Callback info for ipa_simd_modify_stmt_ops below. */ + +struct modify_stmt_info { + ipa_parm_adjustment_vec adjustments; + gimple stmt; + /* True if the parent statement was modified by + ipa_simd_modify_stmt_ops. */ + bool modified; +}; + +/* Callback for walk_gimple_op. + + Adjust operands from a given statement as specified in the + adjustments vector in the callback data. */ + +static tree +ipa_simd_modify_stmt_ops (tree *tp, int *walk_subtrees, void *data) +{ + struct walk_stmt_info *wi = (struct walk_stmt_info *) data; + if (!SSA_VAR_P (*tp)) + { + /* Make sure we treat subtrees as a RHS. This makes sure that + when examining the `*foo' in *foo=x, the `foo' get treated as + a use properly. */ + wi->is_lhs = false; + wi->val_only = true; + if (TYPE_P (*tp)) + *walk_subtrees = 0; + return NULL_TREE; + } + struct modify_stmt_info *info = (struct modify_stmt_info *) wi->info; + struct ipa_parm_adjustment *cand + = ipa_get_adjustment_candidate (&tp, NULL, info->adjustments, true); + if (!cand) + return NULL_TREE; + + tree t = *tp; + tree repl = make_ssa_name (TREE_TYPE (t), NULL); + + gimple stmt; + gimple_stmt_iterator gsi = gsi_for_stmt (info->stmt); + if (wi->is_lhs) + { + stmt = gimple_build_assign (unshare_expr (cand->new_decl), repl); + gsi_insert_after (&gsi, stmt, GSI_SAME_STMT); + SSA_NAME_DEF_STMT (repl) = info->stmt; + } + else + { + /* You'd think we could skip the extra SSA variable when + wi->val_only=true, but we may have `*var' which will get + replaced into `*var_array[iter]' and will likely be something + not gimple. */ + stmt = gimple_build_assign (repl, unshare_expr (cand->new_decl)); + gsi_insert_before (&gsi, stmt, GSI_SAME_STMT); + } + + if (!useless_type_conversion_p (TREE_TYPE (*tp), TREE_TYPE (repl))) + { + tree vce = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (*tp), repl); + *tp = vce; + } + else + *tp = repl; + + info->modified = true; + wi->is_lhs = false; + wi->val_only = true; + return NULL_TREE; +} + +/* Traverse the function body and perform all modifications as + described in ADJUSTMENTS. At function return, ADJUSTMENTS will be + modified such that the replacement/reduction value will now be an + offset into the corresponding simd_array. + + This function will replace all function argument uses with their + corresponding simd array elements, and ajust the return values + accordingly. */ + +static void +ipa_simd_modify_function_body (struct cgraph_node *node, + ipa_parm_adjustment_vec adjustments, + tree retval_array, tree iter) +{ + basic_block bb; + unsigned int i, j; + + /* Re-use the adjustments array, but this time use it to replace + every function argument use to an offset into the corresponding + simd_array. */ + for (i = 0, j = 0; i < node->simdclone->nargs; ++i, ++j) + { + if (!node->simdclone->args[i].vector_arg) + continue; + + tree basetype = TREE_TYPE (node->simdclone->args[i].orig_arg); + tree vectype = TREE_TYPE (node->simdclone->args[i].vector_arg); + adjustments[j].new_decl + = build4 (ARRAY_REF, + basetype, + node->simdclone->args[i].simd_array, + iter, + NULL_TREE, NULL_TREE); + if (adjustments[j].op == IPA_PARM_OP_NONE + && TYPE_VECTOR_SUBPARTS (vectype) < node->simdclone->simdlen) + j += node->simdclone->simdlen / TYPE_VECTOR_SUBPARTS (vectype) - 1; + } + + struct modify_stmt_info info; + info.adjustments = adjustments; + + FOR_EACH_BB_FN (bb, DECL_STRUCT_FUNCTION (node->decl)) + { + gimple_stmt_iterator gsi; + + gsi = gsi_start_bb (bb); + while (!gsi_end_p (gsi)) + { + gimple stmt = gsi_stmt (gsi); + info.stmt = stmt; + struct walk_stmt_info wi; + + memset (&wi, 0, sizeof (wi)); + info.modified = false; + wi.info = &info; + walk_gimple_op (stmt, ipa_simd_modify_stmt_ops, &wi); + + if (gimple_code (stmt) == GIMPLE_RETURN) + { + tree retval = gimple_return_retval (stmt); + if (!retval) + { + gsi_remove (&gsi, true); + continue; + } + + /* Replace `return foo' with `retval_array[iter] = foo'. */ + tree ref = build4 (ARRAY_REF, TREE_TYPE (retval), + retval_array, iter, NULL, NULL); + stmt = gimple_build_assign (ref, retval); + gsi_replace (&gsi, stmt, true); + info.modified = true; + } + + if (info.modified) + { + update_stmt (stmt); + if (maybe_clean_eh_stmt (stmt)) + gimple_purge_dead_eh_edges (gimple_bb (stmt)); + } + gsi_next (&gsi); + } + } +} + +/* Adjust the argument types in NODE to their appropriate vector + counterparts. */ + +static void +simd_clone_adjust (struct cgraph_node *node) +{ + push_cfun (DECL_STRUCT_FUNCTION (node->decl)); + + targetm.simd_clone.adjust (node); + + tree retval = simd_clone_adjust_return_type (node); + ipa_parm_adjustment_vec adjustments + = simd_clone_adjust_argument_types (node); + + push_gimplify_context (); + + gimple_seq seq = simd_clone_init_simd_arrays (node, adjustments); + + /* Adjust all uses of vector arguments accordingly. Adjust all + return values accordingly. */ + tree iter = create_tmp_var (unsigned_type_node, "iter"); + tree iter1 = make_ssa_name (iter, NULL); + tree iter2 = make_ssa_name (iter, NULL); + ipa_simd_modify_function_body (node, adjustments, retval, iter1); + + /* Initialize the iteration variable. */ + basic_block entry_bb = single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun)); + basic_block body_bb = split_block_after_labels (entry_bb)->dest; + gimple_stmt_iterator gsi = gsi_after_labels (entry_bb); + /* Insert the SIMD array and iv initialization at function + entry. */ + gsi_insert_seq_before (&gsi, seq, GSI_NEW_STMT); + + pop_gimplify_context (NULL); + + /* Create a new BB right before the original exit BB, to hold the + iteration increment and the condition/branch. */ + basic_block orig_exit = EDGE_PRED (EXIT_BLOCK_PTR_FOR_FN (cfun), 0)->src; + basic_block incr_bb = create_empty_bb (orig_exit); + /* The succ of orig_exit was EXIT_BLOCK_PTR_FOR_FN (cfun), with an empty + flag. Set it now to be a FALLTHRU_EDGE. */ + gcc_assert (EDGE_COUNT (orig_exit->succs) == 1); + EDGE_SUCC (orig_exit, 0)->flags |= EDGE_FALLTHRU; + for (unsigned i = 0; + i < EDGE_COUNT (EXIT_BLOCK_PTR_FOR_FN (cfun)->preds); ++i) + { + edge e = EDGE_PRED (EXIT_BLOCK_PTR_FOR_FN (cfun), i); + redirect_edge_succ (e, incr_bb); + } + edge e = make_edge (incr_bb, EXIT_BLOCK_PTR_FOR_FN (cfun), 0); + e->probability = REG_BR_PROB_BASE; + gsi = gsi_last_bb (incr_bb); + gimple g = gimple_build_assign_with_ops (PLUS_EXPR, iter2, iter1, + build_int_cst (unsigned_type_node, + 1)); + gsi_insert_after (&gsi, g, GSI_CONTINUE_LINKING); + + /* Mostly annotate the loop for the vectorizer (the rest is done below). */ + struct loop *loop = alloc_loop (); + cfun->has_force_vect_loops = true; + loop->safelen = node->simdclone->simdlen; + loop->force_vect = true; + loop->header = body_bb; + add_bb_to_loop (incr_bb, loop); + + /* Branch around the body if the mask applies. */ + if (node->simdclone->inbranch) + { + gimple_stmt_iterator gsi = gsi_last_bb (loop->header); + tree mask_array + = node->simdclone->args[node->simdclone->nargs - 1].simd_array; + tree mask = make_ssa_name (TREE_TYPE (TREE_TYPE (mask_array)), NULL); + tree aref = build4 (ARRAY_REF, + TREE_TYPE (TREE_TYPE (mask_array)), + mask_array, iter1, + NULL, NULL); + g = gimple_build_assign (mask, aref); + gsi_insert_after (&gsi, g, GSI_CONTINUE_LINKING); + int bitsize = GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (aref))); + if (!INTEGRAL_TYPE_P (TREE_TYPE (aref))) + { + aref = build1 (VIEW_CONVERT_EXPR, + build_nonstandard_integer_type (bitsize, 0), mask); + mask = make_ssa_name (TREE_TYPE (aref), NULL); + g = gimple_build_assign (mask, aref); + gsi_insert_after (&gsi, g, GSI_CONTINUE_LINKING); + } + + g = gimple_build_cond (EQ_EXPR, mask, build_zero_cst (TREE_TYPE (mask)), + NULL, NULL); + gsi_insert_after (&gsi, g, GSI_CONTINUE_LINKING); + make_edge (loop->header, incr_bb, EDGE_TRUE_VALUE); + FALLTHRU_EDGE (loop->header)->flags = EDGE_FALSE_VALUE; + } + + /* Generate the condition. */ + g = gimple_build_cond (LT_EXPR, + iter2, + build_int_cst (unsigned_type_node, + node->simdclone->simdlen), + NULL, NULL); + gsi_insert_after (&gsi, g, GSI_CONTINUE_LINKING); + e = split_block (incr_bb, gsi_stmt (gsi)); + basic_block latch_bb = e->dest; + basic_block new_exit_bb = e->dest; + new_exit_bb = split_block (latch_bb, NULL)->dest; + loop->latch = latch_bb; + + redirect_edge_succ (FALLTHRU_EDGE (latch_bb), body_bb); + + make_edge (incr_bb, new_exit_bb, EDGE_FALSE_VALUE); + /* The successor of incr_bb is already pointing to latch_bb; just + change the flags. + make_edge (incr_bb, latch_bb, EDGE_TRUE_VALUE); */ + FALLTHRU_EDGE (incr_bb)->flags = EDGE_TRUE_VALUE; + + gimple phi = create_phi_node (iter1, body_bb); + edge preheader_edge = find_edge (entry_bb, body_bb); + edge latch_edge = single_succ_edge (latch_bb); + add_phi_arg (phi, build_zero_cst (unsigned_type_node), preheader_edge, + UNKNOWN_LOCATION); + add_phi_arg (phi, iter2, latch_edge, UNKNOWN_LOCATION); + + /* Generate the new return. */ + gsi = gsi_last_bb (new_exit_bb); + if (retval + && TREE_CODE (retval) == VIEW_CONVERT_EXPR + && TREE_CODE (TREE_OPERAND (retval, 0)) == RESULT_DECL) + retval = TREE_OPERAND (retval, 0); + else if (retval) + { + retval = build1 (VIEW_CONVERT_EXPR, + TREE_TYPE (TREE_TYPE (node->decl)), + retval); + retval = force_gimple_operand_gsi (&gsi, retval, true, NULL, + false, GSI_CONTINUE_LINKING); + } + g = gimple_build_return (retval); + gsi_insert_after (&gsi, g, GSI_CONTINUE_LINKING); + + /* Handle aligned clauses by replacing default defs of the aligned + uniform args with __builtin_assume_aligned (arg_N(D), alignment) + lhs. Handle linear by adding PHIs. */ + for (unsigned i = 0; i < node->simdclone->nargs; i++) + if (node->simdclone->args[i].alignment + && node->simdclone->args[i].arg_type == SIMD_CLONE_ARG_TYPE_UNIFORM + && (node->simdclone->args[i].alignment + & (node->simdclone->args[i].alignment - 1)) == 0 + && TREE_CODE (TREE_TYPE (node->simdclone->args[i].orig_arg)) + == POINTER_TYPE) + { + unsigned int alignment = node->simdclone->args[i].alignment; + tree orig_arg = node->simdclone->args[i].orig_arg; + tree def = ssa_default_def (cfun, orig_arg); + if (!has_zero_uses (def)) + { + tree fn = builtin_decl_explicit (BUILT_IN_ASSUME_ALIGNED); + gimple_seq seq = NULL; + bool need_cvt = false; + gimple call + = gimple_build_call (fn, 2, def, size_int (alignment)); + g = call; + if (!useless_type_conversion_p (TREE_TYPE (orig_arg), + ptr_type_node)) + need_cvt = true; + tree t = make_ssa_name (need_cvt ? ptr_type_node : orig_arg, NULL); + gimple_call_set_lhs (g, t); + gimple_seq_add_stmt_without_update (&seq, g); + if (need_cvt) + { + t = make_ssa_name (orig_arg, NULL); + g = gimple_build_assign_with_ops (NOP_EXPR, t, + gimple_call_lhs (g), + NULL_TREE); + gimple_seq_add_stmt_without_update (&seq, g); + } + gsi_insert_seq_on_edge_immediate + (single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun)), seq); + + entry_bb = single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun)); + int freq = compute_call_stmt_bb_frequency (current_function_decl, + entry_bb); + cgraph_create_edge (node, cgraph_get_create_node (fn), + call, entry_bb->count, freq); + + imm_use_iterator iter; + use_operand_p use_p; + gimple use_stmt; + tree repl = gimple_get_lhs (g); + FOR_EACH_IMM_USE_STMT (use_stmt, iter, def) + if (is_gimple_debug (use_stmt) || use_stmt == call) + continue; + else + FOR_EACH_IMM_USE_ON_STMT (use_p, iter) + SET_USE (use_p, repl); + } + } + else if (node->simdclone->args[i].arg_type + == SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP) + { + tree orig_arg = node->simdclone->args[i].orig_arg; + tree def = ssa_default_def (cfun, orig_arg); + gcc_assert (INTEGRAL_TYPE_P (TREE_TYPE (orig_arg)) + || POINTER_TYPE_P (TREE_TYPE (orig_arg))); + if (!has_zero_uses (def)) + { + iter1 = make_ssa_name (orig_arg, NULL); + iter2 = make_ssa_name (orig_arg, NULL); + phi = create_phi_node (iter1, body_bb); + add_phi_arg (phi, def, preheader_edge, UNKNOWN_LOCATION); + add_phi_arg (phi, iter2, latch_edge, UNKNOWN_LOCATION); + enum tree_code code = INTEGRAL_TYPE_P (TREE_TYPE (orig_arg)) + ? PLUS_EXPR : POINTER_PLUS_EXPR; + tree addtype = INTEGRAL_TYPE_P (TREE_TYPE (orig_arg)) + ? TREE_TYPE (orig_arg) : sizetype; + tree addcst + = build_int_cst (addtype, node->simdclone->args[i].linear_step); + g = gimple_build_assign_with_ops (code, iter2, iter1, addcst); + gsi = gsi_last_bb (incr_bb); + gsi_insert_before (&gsi, g, GSI_SAME_STMT); + + imm_use_iterator iter; + use_operand_p use_p; + gimple use_stmt; + FOR_EACH_IMM_USE_STMT (use_stmt, iter, def) + if (use_stmt == phi) + continue; + else + FOR_EACH_IMM_USE_ON_STMT (use_p, iter) + SET_USE (use_p, iter1); + } + } + + calculate_dominance_info (CDI_DOMINATORS); + add_loop (loop, loop->header->loop_father); + update_ssa (TODO_update_ssa); + + pop_cfun (); +} + +/* If the function in NODE is tagged as an elemental SIMD function, + create the appropriate SIMD clones. */ + +static void +expand_simd_clones (struct cgraph_node *node) +{ + if (lookup_attribute ("noclone", DECL_ATTRIBUTES (node->decl))) + return; + + tree attr = lookup_attribute ("omp declare simd", + DECL_ATTRIBUTES (node->decl)); + if (!attr || targetm.simd_clone.compute_vecsize_and_simdlen == NULL) + return; + /* Ignore + #pragma omp declare simd + extern int foo (); + in C, there we don't know the argument types at all. */ + if (!node->definition + && TYPE_ARG_TYPES (TREE_TYPE (node->decl)) == NULL_TREE) + return; + + do + { + /* Start with parsing the "omp declare simd" attribute(s). */ + bool inbranch_clause_specified; + struct cgraph_simd_clone *clone_info + = simd_clone_clauses_extract (node, TREE_VALUE (attr), + &inbranch_clause_specified); + if (clone_info == NULL) + continue; + + int orig_simdlen = clone_info->simdlen; + tree base_type = simd_clone_compute_base_data_type (node, clone_info); + /* The target can return 0 (no simd clones should be created), + 1 (just one ISA of simd clones should be created) or higher + count of ISA variants. In that case, clone_info is initialized + for the first ISA variant. */ + int count + = targetm.simd_clone.compute_vecsize_and_simdlen (node, clone_info, + base_type, 0); + if (count == 0) + continue; + + /* Loop over all COUNT ISA variants, and if !INBRANCH_CLAUSE_SPECIFIED, + also create one inbranch and one !inbranch clone of it. */ + for (int i = 0; i < count * 2; i++) + { + struct cgraph_simd_clone *clone = clone_info; + if (inbranch_clause_specified && (i & 1) != 0) + continue; + + if (i != 0) + { + clone = simd_clone_struct_alloc (clone_info->nargs + - clone_info->inbranch + + ((i & 1) != 0)); + simd_clone_struct_copy (clone, clone_info); + /* Undo changes targetm.simd_clone.compute_vecsize_and_simdlen + and simd_clone_adjust_argument_types did to the first + clone's info. */ + clone->nargs -= clone_info->inbranch; + clone->simdlen = orig_simdlen; + /* And call the target hook again to get the right ISA. */ + targetm.simd_clone.compute_vecsize_and_simdlen (node, clone, + base_type, + i / 2); + if ((i & 1) != 0) + clone->inbranch = 1; + } + + /* simd_clone_mangle might fail if such a clone has been created + already. */ + tree id = simd_clone_mangle (node, clone); + if (id == NULL_TREE) + continue; + + /* Only when we are sure we want to create the clone actually + clone the function (or definitions) or create another + extern FUNCTION_DECL (for prototypes without definitions). */ + struct cgraph_node *n = simd_clone_create (node); + if (n == NULL) + continue; + + n->simdclone = clone; + clone->origin = node; + clone->next_clone = NULL; + if (node->simd_clones == NULL) + { + clone->prev_clone = n; + node->simd_clones = n; + } + else + { + clone->prev_clone = node->simd_clones->simdclone->prev_clone; + clone->prev_clone->simdclone->next_clone = n; + node->simd_clones->simdclone->prev_clone = n; + } + change_decl_assembler_name (n->decl, id); + /* And finally adjust the return type, parameters and for + definitions also function body. */ + if (node->definition) + simd_clone_adjust (n); + else + { + simd_clone_adjust_return_type (n); + simd_clone_adjust_argument_types (n); + } + } + } + while ((attr = lookup_attribute ("omp declare simd", TREE_CHAIN (attr)))); +} + +/* Entry point for IPA simd clone creation pass. */ + +static unsigned int +ipa_omp_simd_clone (void) +{ + struct cgraph_node *node; + FOR_EACH_FUNCTION (node) + expand_simd_clones (node); + return 0; +} + +namespace { + +const pass_data pass_data_omp_simd_clone = +{ + SIMPLE_IPA_PASS, /* type */ + "simdclone", /* name */ + OPTGROUP_NONE, /* optinfo_flags */ + true, /* has_gate */ + true, /* has_execute */ + TV_NONE, /* tv_id */ + ( PROP_ssa | PROP_cfg ), /* properties_required */ + 0, /* properties_provided */ + 0, /* properties_destroyed */ + 0, /* todo_flags_start */ + 0, /* todo_flags_finish */ +}; + +class pass_omp_simd_clone : public simple_ipa_opt_pass +{ +public: + pass_omp_simd_clone(gcc::context *ctxt) + : simple_ipa_opt_pass(pass_data_omp_simd_clone, ctxt) + {} + + /* opt_pass methods: */ + bool gate () { return flag_openmp || flag_openmp_simd + || flag_enable_cilkplus; } + unsigned int execute () { return ipa_omp_simd_clone (); } +}; + +} // anon namespace + +simple_ipa_opt_pass * +make_pass_omp_simd_clone (gcc::context *ctxt) +{ + return new pass_omp_simd_clone (ctxt); +} #include "gt-omp-low.h" diff --git a/gcc/passes.def b/gcc/passes.def index a9411fa0baa..65c00bbbb41 100644 --- a/gcc/passes.def +++ b/gcc/passes.def @@ -117,6 +117,7 @@ along with GCC; see the file COPYING3. If not see compiled unit. */ INSERT_PASSES_AFTER (all_late_ipa_passes) NEXT_PASS (pass_ipa_pta); + NEXT_PASS (pass_omp_simd_clone); TERMINATE_PASS_LIST () /* These passes are run after IPA passes on every function that is being diff --git a/gcc/target.def b/gcc/target.def index cde4fb49a07..ca1d2506c7b 100644 --- a/gcc/target.def +++ b/gcc/target.def @@ -1521,6 +1521,36 @@ hook_int_uint_mode_1) HOOK_VECTOR_END (sched) +/* Functions relating to OpenMP and Cilk Plus SIMD clones. */ +#undef HOOK_PREFIX +#define HOOK_PREFIX "TARGET_SIMD_CLONE_" +HOOK_VECTOR (TARGET_SIMD_CLONE, simd_clone) + +DEFHOOK +(compute_vecsize_and_simdlen, +"This hook should set @var{vecsize_mangle}, @var{vecsize_int}, @var{vecsize_float}\n\ +fields in @var{simd_clone} structure pointed by @var{clone_info} argument and also\n\ +@var{simdlen} field if it was previously 0.\n\ +The hook should return 0 if SIMD clones shouldn't be emitted,\n\ +or number of @var{vecsize_mangle} variants that should be emitted.", +int, (struct cgraph_node *, struct cgraph_simd_clone *, tree, int), NULL) + +DEFHOOK +(adjust, +"This hook should add implicit @code{attribute(target(\"...\"))} attribute\n\ +to SIMD clone @var{node} if needed.", +void, (struct cgraph_node *), NULL) + +DEFHOOK +(usable, +"This hook should return -1 if SIMD clone @var{node} shouldn't be used\n\ +in vectorized loops in current function, or non-negative number if it is\n\ +usable. In that case, the smaller the number is, the more desirable it is\n\ +to use it.", +int, (struct cgraph_node *), NULL) + +HOOK_VECTOR_END (simd_clone) + /* Functions relating to vectorization. */ #undef HOOK_PREFIX #define HOOK_PREFIX "TARGET_VECTORIZE_" diff --git a/gcc/target.h b/gcc/target.h index 12249ecfd5c..7c12f82af7d 100644 --- a/gcc/target.h +++ b/gcc/target.h @@ -93,6 +93,8 @@ extern bool target_default_pointer_address_modes_p (void); struct stdarg_info; struct spec_info_def; struct hard_reg_set_container; +struct cgraph_node; +struct cgraph_simd_clone; /* The struct used by the secondary_reload target hook. */ typedef struct secondary_reload_info diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 95745e10f4a..20e885ade3c 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,25 @@ +2013-11-27 Aldy Hernandez <aldyh@redhat.com> + Jakub Jelinek <jakub@redhat.com> + + * g++.dg/gomp/declare-simd-1.C (f38): Make sure + simdlen is a power of two. + * gcc.dg/gomp/simd-clones-2.c: Compile on all targets. + Remove -msse2. Adjust regexps for name mangling changes. + * gcc.dg/gomp/simd-clones-3.c: Likewise. + * gcc.dg/vect/vect-simd-clone-1.c: New test. + * gcc.dg/vect/vect-simd-clone-2.c: New test. + * gcc.dg/vect/vect-simd-clone-3.c: New test. + * gcc.dg/vect/vect-simd-clone-4.c: New test. + * gcc.dg/vect/vect-simd-clone-5.c: New test. + * gcc.dg/vect/vect-simd-clone-6.c: New test. + * gcc.dg/vect/vect-simd-clone-7.c: New test. + * gcc.dg/vect/vect-simd-clone-8.c: New test. + * gcc.dg/vect/vect-simd-clone-9.c: New test. + * gcc.dg/vect/vect-simd-clone-10.c: New test. + * gcc.dg/vect/vect-simd-clone-10.h: New file. + * gcc.dg/vect/vect-simd-clone-10a.c: New file. + * gcc.dg/vect/vect-simd-clone-11.c: New test. + 2013-11-27 Rainer Orth <ro@CeBiTec.Uni-Bielefeld.DE> * gcc.dg/cilk-plus/cilk-plus.exp: Append to ld_library_path. diff --git a/gcc/testsuite/g++.dg/gomp/declare-simd-1.C b/gcc/testsuite/g++.dg/gomp/declare-simd-1.C index e9be161948e..4b2812fb6e6 100644 --- a/gcc/testsuite/g++.dg/gomp/declare-simd-1.C +++ b/gcc/testsuite/g++.dg/gomp/declare-simd-1.C @@ -239,5 +239,5 @@ struct D void f38 (D &d) { - d.f37 <12> (6); + d.f37 <16> (6); } diff --git a/gcc/testsuite/gcc.dg/gomp/simd-clones-1.c b/gcc/testsuite/gcc.dg/gomp/simd-clones-1.c new file mode 100644 index 00000000000..486b67a637a --- /dev/null +++ b/gcc/testsuite/gcc.dg/gomp/simd-clones-1.c @@ -0,0 +1,33 @@ +/* { dg-do compile } */ +/* { dg-options "-fopenmp -fdump-tree-optimized -O3" } */ + +/* Test that functions that have SIMD clone counterparts are not + cloned by IPA-cp. For example, special_add() below has SIMD clones + created for it. However, if IPA-cp later decides to clone a + specialization of special_add(x, 666) when analyzing fillit(), we + will forever keep the vectorizer from using the SIMD versions of + special_add in a loop. + + If IPA-CP gets taught how to adjust the SIMD clones as well, this + test could be removed. */ + +#pragma omp declare simd simdlen(4) +static int __attribute__ ((noinline)) +special_add (int x, int y) +{ + if (y == 666) + return x + y + 123; + else + return x + y; +} + +void fillit(int *tot) +{ + int i; + + for (i=0; i < 10000; ++i) + tot[i] = special_add (i, 666); +} + +/* { dg-final { scan-tree-dump-not "special_add.constprop" "optimized" } } */ +/* { dg-final { cleanup-tree-dump "optimized" } } */ diff --git a/gcc/testsuite/gcc.dg/gomp/simd-clones-2.c b/gcc/testsuite/gcc.dg/gomp/simd-clones-2.c new file mode 100644 index 00000000000..030ae6cb19b --- /dev/null +++ b/gcc/testsuite/gcc.dg/gomp/simd-clones-2.c @@ -0,0 +1,26 @@ +/* { dg-options "-fopenmp -fdump-tree-optimized -O" } */ + +#pragma omp declare simd inbranch uniform(c) linear(b:66) +#pragma omp declare simd notinbranch aligned(c:32) +int addit(int a, int b, int *c) +{ + return a + b; +} + +#pragma omp declare simd uniform(a) aligned(a:32) linear(k:1) notinbranch +float setArray(float *a, float x, int k) +{ + a[k] = a[k] + x; + return a[k]; +} + +/* { dg-final { scan-tree-dump "_ZGVbN4ua32vl_setArray" "optimized" { target i?86-*-* x86_64-*-* } } } */ +/* { dg-final { scan-tree-dump "_ZGVbN4vvva32_addit" "optimized" { target i?86-*-* x86_64-*-* } } } */ +/* { dg-final { scan-tree-dump "_ZGVbM4vl66u_addit" "optimized" { target i?86-*-* x86_64-*-* } } } */ +/* { dg-final { scan-tree-dump "_ZGVcN8ua32vl_setArray" "optimized" { target i?86-*-* x86_64-*-* } } } */ +/* { dg-final { scan-tree-dump "_ZGVcN4vvva32_addit" "optimized" { target i?86-*-* x86_64-*-* } } } */ +/* { dg-final { scan-tree-dump "_ZGVcM4vl66u_addit" "optimized" { target i?86-*-* x86_64-*-* } } } */ +/* { dg-final { scan-tree-dump "_ZGVdN8ua32vl_setArray" "optimized" { target i?86-*-* x86_64-*-* } } } */ +/* { dg-final { scan-tree-dump "_ZGVdN8vvva32_addit" "optimized" { target i?86-*-* x86_64-*-* } } } */ +/* { dg-final { scan-tree-dump "_ZGVdM8vl66u_addit" "optimized" { target i?86-*-* x86_64-*-* } } } */ +/* { dg-final { cleanup-tree-dump "optimized" } } */ diff --git a/gcc/testsuite/gcc.dg/gomp/simd-clones-3.c b/gcc/testsuite/gcc.dg/gomp/simd-clones-3.c new file mode 100644 index 00000000000..98e767c3ae3 --- /dev/null +++ b/gcc/testsuite/gcc.dg/gomp/simd-clones-3.c @@ -0,0 +1,18 @@ +/* { dg-options "-fopenmp -fdump-tree-optimized -O2" } */ + +/* Test that if there is no *inbranch clauses, that both the masked and + the unmasked version are created. */ + +#pragma omp declare simd +int addit(int a, int b, int c) +{ + return a + b; +} + +/* { dg-final { scan-tree-dump "_ZGVbN4vvv_addit" "optimized" { target i?86-*-* x86_64-*-* } } } */ +/* { dg-final { scan-tree-dump "_ZGVbM4vvv_addit" "optimized" { target i?86-*-* x86_64-*-* } } } */ +/* { dg-final { scan-tree-dump "_ZGVcN4vvv_addit" "optimized" { target i?86-*-* x86_64-*-* } } } */ +/* { dg-final { scan-tree-dump "_ZGVcM4vvv_addit" "optimized" { target i?86-*-* x86_64-*-* } } } */ +/* { dg-final { scan-tree-dump "_ZGVdN8vvv_addit" "optimized" { target i?86-*-* x86_64-*-* } } } */ +/* { dg-final { scan-tree-dump "_ZGVdM8vvv_addit" "optimized" { target i?86-*-* x86_64-*-* } } } */ +/* { dg-final { cleanup-tree-dump "optimized" } } */ diff --git a/gcc/testsuite/gcc.dg/gomp/simd-clones-4.c b/gcc/testsuite/gcc.dg/gomp/simd-clones-4.c new file mode 100644 index 00000000000..893f44e690f --- /dev/null +++ b/gcc/testsuite/gcc.dg/gomp/simd-clones-4.c @@ -0,0 +1,11 @@ +/* { dg-do compile { target i?86-*-* x86_64-*-* } } */ +/* { dg-options "-fopenmp" } */ + +#pragma omp declare simd simdlen(4) notinbranch +int f2 (int a, int b) +{ + if (a > 5) + return a + b; + else + return a - b; +} diff --git a/gcc/testsuite/gcc.dg/gomp/simd-clones-5.c b/gcc/testsuite/gcc.dg/gomp/simd-clones-5.c new file mode 100644 index 00000000000..801c24f34af --- /dev/null +++ b/gcc/testsuite/gcc.dg/gomp/simd-clones-5.c @@ -0,0 +1,12 @@ +/* { dg-do compile { target i?86-*-* x86_64-*-* } } */ +/* { dg-options "-fopenmp -w" } */ + +/* ?? The -w above is to inhibit the following warning for now: + a.c:2:6: warning: AVX vector argument without AVX enabled changes + the ABI [enabled by default]. */ + +#pragma omp declare simd notinbranch simdlen(4) +void foo (int *a) +{ + *a = 555; +} diff --git a/gcc/testsuite/gcc.dg/gomp/simd-clones-6.c b/gcc/testsuite/gcc.dg/gomp/simd-clones-6.c new file mode 100644 index 00000000000..8818594bfac --- /dev/null +++ b/gcc/testsuite/gcc.dg/gomp/simd-clones-6.c @@ -0,0 +1,11 @@ +/* { dg-do compile { target i?86-*-* x86_64-*-* } } */ +/* { dg-options "-fopenmp" } */ + +/* Test that array subscripts are properly adjusted. */ + +int array[1000]; +#pragma omp declare simd notinbranch simdlen(4) +void foo (int i) +{ + array[i] = 555; +} diff --git a/gcc/testsuite/gcc.dg/gomp/simd-clones-7.c b/gcc/testsuite/gcc.dg/gomp/simd-clones-7.c new file mode 100644 index 00000000000..ef6fa113c1a --- /dev/null +++ b/gcc/testsuite/gcc.dg/gomp/simd-clones-7.c @@ -0,0 +1,16 @@ +/* { dg-do compile { target i?86-*-* x86_64-*-* } } */ +/* { dg-options "-fopenmp -w" } */ + +int array[1000]; + +#pragma omp declare simd notinbranch simdlen(4) +void foo (int *a, int b) +{ + a[b] = 555; +} + +#pragma omp declare simd notinbranch simdlen(4) +void bar (int *a) +{ + *a = 555; +} diff --git a/gcc/testsuite/gcc.dg/vect/vect-simd-clone-1.c b/gcc/testsuite/gcc.dg/vect/vect-simd-clone-1.c new file mode 100644 index 00000000000..d802dfb7e27 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/vect-simd-clone-1.c @@ -0,0 +1,58 @@ +/* { dg-additional-options "-fopenmp-simd" } */ +/* { dg-additional-options "-mavx" { target avx_runtime } } */ + +#include "tree-vect.h" + +#ifndef N +#define N 1024 +#endif + +int array[N]; + +#pragma omp declare simd simdlen(4) notinbranch +#pragma omp declare simd simdlen(4) notinbranch uniform(b) linear(c:3) +#pragma omp declare simd simdlen(8) notinbranch +#pragma omp declare simd simdlen(8) notinbranch uniform(b) linear(c:3) +__attribute__((noinline)) int +foo (int a, int b, int c) +{ + if (a < 30) + return 5; + return a + b + c; +} + +__attribute__((noinline, noclone)) void +bar () +{ + int i; +#pragma omp simd + for (i = 0; i < N; ++i) + array[i] = foo (i, 123, i * 3); +} + +__attribute__((noinline, noclone)) void +baz () +{ + int i; +#pragma omp simd + for (i = 0; i < N; ++i) + array[i] = foo (i, array[i], i * 3); +} + +int +main () +{ + int i; + check_vect (); + bar (); + for (i = 0; i < N; i++) + if (array[i] != (i < 30 ? 5 : i * 4 + 123)) + abort (); + baz (); + for (i = 0; i < N; i++) + if (array[i] != (i < 30 ? 5 : i * 8 + 123)) + abort (); + return 0; +} + +/* { dg-final { cleanup-tree-dump "vect" } } */ diff --git a/gcc/testsuite/gcc.dg/vect/vect-simd-clone-10.c b/gcc/testsuite/gcc.dg/vect/vect-simd-clone-10.c new file mode 100644 index 00000000000..3f29b52a8c3 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/vect-simd-clone-10.c @@ -0,0 +1,83 @@ +/* { dg-additional-options "-fopenmp-simd" } */ +/* { dg-additional-options "-mavx" { target avx_runtime } } */ +/* { dg-additional-sources vect-simd-clone-10a.c } */ + +#include "tree-vect.h" + +#ifndef N +#define N 1024 +#endif + +int a[N], b[N]; +long int c[N]; +unsigned char d[N]; + +#include "vect-simd-clone-10.h" + +__attribute__((noinline)) void +fn1 (void) +{ + int i; + #pragma omp simd + for (i = 0; i < N; i++) + a[i] = foo (c[i], a[i], b[i]) + 6; + #pragma omp simd + for (i = 0; i < N; i++) + c[i] = bar (a[i], b[i], c[i]) * 2; +} + +__attribute__((noinline)) void +fn2 (void) +{ + int i; + #pragma omp simd + for (i = 0; i < N; i++) + { + a[i] = foo (c[i], a[i], b[i]) + 6; + d[i]++; + } + #pragma omp simd + for (i = 0; i < N; i++) + { + c[i] = bar (a[i], b[i], c[i]) * 2; + d[i] /= 2; + } +} + +__attribute__((noinline)) void +fn3 (void) +{ + int i; + for (i = 0; i < N; i++) + { + a[i] = i * 2; + b[i] = 17 + (i % 37); + c[i] = (i & 63); + d[i] = 16 + i; + } +} + +int +main () +{ + int i; + check_vect (); + fn3 (); + fn1 (); + for (i = 0; i < N; i++) + if (a[i] != i * 2 + 23 + (i % 37) + (i & 63) + || b[i] != 17 + (i % 37) + || c[i] != i * 4 + 80 + 4 * (i % 37) + 4 * (i & 63)) + abort (); + fn3 (); + fn2 (); + for (i = 0; i < N; i++) + if (a[i] != i * 2 + 23 + (i % 37) + (i & 63) + || b[i] != 17 + (i % 37) + || c[i] != i * 4 + 80 + 4 * (i % 37) + 4 * (i & 63) + || d[i] != ((unsigned char) (17 + i)) / 2) + abort (); + return 0; +} + +/* { dg-final { cleanup-tree-dump "vect" } } */ diff --git a/gcc/testsuite/gcc.dg/vect/vect-simd-clone-10.h b/gcc/testsuite/gcc.dg/vect/vect-simd-clone-10.h new file mode 100644 index 00000000000..ac3b81f8616 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/vect-simd-clone-10.h @@ -0,0 +1,4 @@ +#pragma omp declare simd notinbranch +extern int foo (long int a, int b, int c); +#pragma omp declare simd notinbranch +extern long int bar (int a, int b, long int c); diff --git a/gcc/testsuite/gcc.dg/vect/vect-simd-clone-10a.c b/gcc/testsuite/gcc.dg/vect/vect-simd-clone-10a.c new file mode 100644 index 00000000000..cd42e7c8bc8 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/vect-simd-clone-10a.c @@ -0,0 +1,17 @@ +/* { dg-do compile } */ + +#include "vect-simd-clone-10.h" + +#pragma omp declare simd notinbranch +extern int +foo (long int a, int b, int c) +{ + return a + b + c; +} + +#pragma omp declare simd notinbranch +extern long int +bar (int a, int b, long int c) +{ + return a + b + c; +} diff --git a/gcc/testsuite/gcc.dg/vect/vect-simd-clone-11.c b/gcc/testsuite/gcc.dg/vect/vect-simd-clone-11.c new file mode 100644 index 00000000000..4cccf852d0e --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/vect-simd-clone-11.c @@ -0,0 +1,66 @@ +/* { dg-additional-options "-fopenmp-simd" } */ +/* { dg-additional-options "-mavx" { target avx_runtime } } */ + +#include "tree-vect.h" + +#ifndef N +#define N 1024 +#endif + +int a[N] __attribute__((aligned (32))); + +#pragma omp declare simd linear(a) linear(b:3) linear(c:6) notinbranch +__attribute__((noinline)) int +foo (int a, int b, int c) +{ + return a ^ (b * 512) ^ (c * 512 * 512); +} + +__attribute__((noinline, noclone)) void +bar (int *d) +{ + int i, j, k; + for (i = 0, j = 0, k = 0; i < N / 2; i++, j++, k += 3) + d[i] = foo (j, i * 3, 2 * k + 2); +} + +#if 0 +__attribute__((noinline, noclone)) void +baz (int *d) +{ + long int i, j, k; + for (i = 0, j = 0, k = 0; i < N / 2; + i = (int) i + 1, j = (int) j + 1, k = (int) k + 3) + d[i] = foo (j, i * 3, 2 * k + 2); +} +#endif + +int +main () +{ + int i; + check_vect (); + if (sizeof (int) * __CHAR_BIT__ < 32) + return 0; + bar (a + 7); + for (i = 0; i < N / 2; i++) + if (a[i + 7] != (i ^ (i * 3 * 512) ^ (((i * 6) + 2) * 512 * 512))) + abort (); + bar (a); + for (i = 0; i < N / 2; i++) + if (a[i] != (i ^ (i * 3 * 512) ^ (((i * 6) + 2) * 512 * 512))) + abort (); +#if 0 + baz (a + 7); + for (i = 0; i < N / 2; i++) + if (a[i + 7] != (i ^ (i * 3 * 512) ^ (((i * 6) + 2) * 512 * 512))) + abort (); + baz (a); + for (i = 0; i < N / 2; i++) + if (a[i] != (i ^ (i * 3 * 512) ^ (((i * 6) + 2) * 512 * 512))) + abort (); +#endif + return 0; +} + +/* { dg-final { cleanup-tree-dump "vect" } } */ diff --git a/gcc/testsuite/gcc.dg/vect/vect-simd-clone-2.c b/gcc/testsuite/gcc.dg/vect/vect-simd-clone-2.c new file mode 100644 index 00000000000..4447607ef51 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/vect-simd-clone-2.c @@ -0,0 +1,52 @@ +/* { dg-additional-options "-fopenmp-simd" } */ +/* { dg-additional-options "-mavx" { target avx_runtime } } */ + +#include "tree-vect.h" + +#ifndef N +#define N 1024 +#endif + +int array[N] __attribute__((aligned (32))); + +#pragma omp declare simd simdlen(4) notinbranch aligned(a:16) uniform(a) linear(b) +#pragma omp declare simd simdlen(4) notinbranch aligned(a:32) uniform(a) linear(b) +#pragma omp declare simd simdlen(8) notinbranch aligned(a:16) uniform(a) linear(b) +#pragma omp declare simd simdlen(8) notinbranch aligned(a:32) uniform(a) linear(b) +__attribute__((noinline)) void +foo (int *a, int b, int c) +{ + a[b] = c; +} + +__attribute__((noinline, noclone)) void +bar () +{ + int i; +#pragma omp simd + for (i = 0; i < N; ++i) + foo (array, i, i * array[i]); +} + +__attribute__((noinline, noclone)) void +baz () +{ + int i; + for (i = 0; i < N; i++) + array[i] = 5 * (i & 7); +} + +int +main () +{ + int i; + check_vect (); + baz (); + bar (); + for (i = 0; i < N; i++) + if (array[i] != 5 * (i & 7) * i) + abort (); + return 0; +} + +/* { dg-final { cleanup-tree-dump "vect" } } */ diff --git a/gcc/testsuite/gcc.dg/vect/vect-simd-clone-3.c b/gcc/testsuite/gcc.dg/vect/vect-simd-clone-3.c new file mode 100644 index 00000000000..222d88e3016 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/vect-simd-clone-3.c @@ -0,0 +1,45 @@ +/* { dg-additional-options "-fopenmp-simd" } */ +/* { dg-additional-options "-mavx" { target avx_runtime } } */ + +#include "tree-vect.h" + +#ifndef N +#define N 1024 +#endif + +int d[N], e[N]; + +#pragma omp declare simd simdlen(4) notinbranch uniform(b) linear(c:3) +__attribute__((noinline)) int +foo (int a, int b, int c) +{ + if (a < 30) + return 5; + return a + b + c; +} + +__attribute__((noinline, noclone)) void +bar () +{ + int i; +#pragma omp simd + for (i = 0; i < N; ++i) + { + d[i] = foo (i, 123, i * 3); + e[i] = e[i] + i; + } +} + +int +main () +{ + int i; + check_vect (); + bar (); + for (i = 0; i < N; i++) + if (d[i] != (i < 30 ? 5 : i * 4 + 123) || e[i] != i) + abort (); + return 0; +} + +/* { dg-final { cleanup-tree-dump "vect" } } */ diff --git a/gcc/testsuite/gcc.dg/vect/vect-simd-clone-4.c b/gcc/testsuite/gcc.dg/vect/vect-simd-clone-4.c new file mode 100644 index 00000000000..5b0a93a53d7 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/vect-simd-clone-4.c @@ -0,0 +1,48 @@ +/* { dg-additional-options "-fopenmp-simd" } */ +/* { dg-additional-options "-mavx" { target avx_runtime } } */ + +#include "tree-vect.h" + +#ifndef N +#define N 1024 +#endif + +float d[N]; +int e[N]; +unsigned short f[N]; + +#pragma omp declare simd simdlen(8) notinbranch uniform(b) +__attribute__((noinline)) float +foo (float a, float b, float c) +{ + if (a < 30) + return 5.0f; + return a + b + c; +} + +__attribute__((noinline, noclone)) void +bar () +{ + int i; +#pragma omp simd + for (i = 0; i < N; ++i) + { + d[i] = foo (i, 123, i * 3); + e[i] = e[i] * 3; + f[i] = f[i] + 1; + } +} + +int +main () +{ + int i; + check_vect (); + bar (); + for (i = 0; i < N; i++) + if (d[i] != (i < 30 ? 5.0f : i * 4 + 123.0f) || e[i] || f[i] != 1) + abort (); + return 0; +} + +/* { dg-final { cleanup-tree-dump "vect" } } */ diff --git a/gcc/testsuite/gcc.dg/vect/vect-simd-clone-5.c b/gcc/testsuite/gcc.dg/vect/vect-simd-clone-5.c new file mode 100644 index 00000000000..fd1d5ffd322 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/vect-simd-clone-5.c @@ -0,0 +1,43 @@ +/* { dg-additional-options "-fopenmp-simd" } */ +/* { dg-additional-options "-mavx" { target avx_runtime } } */ + +#include "tree-vect.h" + +#ifndef N +#define N 1024 +#endif + +int d[N], e[N]; + +#pragma omp declare simd simdlen(4) notinbranch uniform(b) linear(c:3) +__attribute__((noinline)) long long int +foo (int a, int b, int c) +{ + return a + b + c; +} + +__attribute__((noinline, noclone)) void +bar () +{ + int i; +#pragma omp simd + for (i = 0; i < N; ++i) + { + d[i] = foo (i, 123, i * 3); + e[i] = e[i] + i; + } +} + +int +main () +{ + int i; + check_vect (); + bar (); + for (i = 0; i < N; i++) + if (d[i] != i * 4 + 123 || e[i] != i) + abort (); + return 0; +} + +/* { dg-final { cleanup-tree-dump "vect" } } */ diff --git a/gcc/testsuite/gcc.dg/vect/vect-simd-clone-6.c b/gcc/testsuite/gcc.dg/vect/vect-simd-clone-6.c new file mode 100644 index 00000000000..5e5641476c5 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/vect-simd-clone-6.c @@ -0,0 +1,74 @@ +/* { dg-additional-options "-fopenmp-simd" } */ +/* { dg-additional-options "-mavx" { target avx_runtime } } */ + +#include "tree-vect.h" + +#ifndef N +#define N 1024 +#endif + +int a[N]; +long long int b[N]; +short c[N]; + +#pragma omp declare simd +#pragma omp declare simd uniform(b) linear(c:3) +__attribute__((noinline)) short +foo (int a, long long int b, short c) +{ + return a + b + c; +} + +__attribute__((noinline, noclone)) void +bar (int x) +{ + int i; + if (x == 0) + { + #pragma omp simd + for (i = 0; i < N; i++) + c[i] = foo (a[i], b[i], c[i]); + } + else + { + #pragma omp simd + for (i = 0; i < N; i++) + c[i] = foo (a[i], x, i * 3); + } +} + +__attribute__((noinline, noclone)) void +baz (void) +{ + int i; + for (i = 0; i < N; i++) + { + a[i] = 2 * i; + b[i] = -7 * i + 6; + c[i] = (i & 31) << 4; + } +} + +int +main () +{ + int i; + check_vect (); + baz (); + bar (0); + for (i = 0; i < N; i++) + if (a[i] != 2 * i || b[i] != 6 - 7 * i + || c[i] != 6 - 5 * i + ((i & 31) << 4)) + abort (); + else + a[i] = c[i]; + bar (17); + for (i = 0; i < N; i++) + if (a[i] != 6 - 5 * i + ((i & 31) << 4) + || b[i] != 6 - 7 * i + || c[i] != 23 - 2 * i + ((i & 31) << 4)) + abort (); + return 0; +} + +/* { dg-final { cleanup-tree-dump "vect" } } */ diff --git a/gcc/testsuite/gcc.dg/vect/vect-simd-clone-7.c b/gcc/testsuite/gcc.dg/vect/vect-simd-clone-7.c new file mode 100644 index 00000000000..24856eaa41a --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/vect-simd-clone-7.c @@ -0,0 +1,74 @@ +/* { dg-additional-options "-fopenmp-simd" } */ +/* { dg-additional-options "-mavx" { target avx_runtime } } */ + +#include "tree-vect.h" + +#ifndef N +#define N 1024 +#endif + +int a[N]; +long long int b[N]; +short c[N]; + +#pragma omp declare simd +#pragma omp declare simd uniform(b) linear(c:3) +__attribute__((noinline)) short +foo (int a, long long int b, int c) +{ + return a + b + c; +} + +__attribute__((noinline, noclone)) void +bar (int x) +{ + int i; + if (x == 0) + { + #pragma omp simd + for (i = 0; i < N; i++) + c[i] = foo (a[i], b[i], c[i]); + } + else + { + #pragma omp simd + for (i = 0; i < N; i++) + c[i] = foo (a[i], x, i * 3); + } +} + +__attribute__((noinline, noclone)) void +baz (void) +{ + int i; + for (i = 0; i < N; i++) + { + a[i] = 2 * i; + b[i] = -7 * i + 6; + c[i] = (i & 31) << 4; + } +} + +int +main () +{ + int i; + check_vect (); + baz (); + bar (0); + for (i = 0; i < N; i++) + if (a[i] != 2 * i || b[i] != 6 - 7 * i + || c[i] != 6 - 5 * i + ((i & 31) << 4)) + abort (); + else + a[i] = c[i]; + bar (17); + for (i = 0; i < N; i++) + if (a[i] != 6 - 5 * i + ((i & 31) << 4) + || b[i] != 6 - 7 * i + || c[i] != 23 - 2 * i + ((i & 31) << 4)) + abort (); + return 0; +} + +/* { dg-final { cleanup-tree-dump "vect" } } */ diff --git a/gcc/testsuite/gcc.dg/vect/vect-simd-clone-8.c b/gcc/testsuite/gcc.dg/vect/vect-simd-clone-8.c new file mode 100644 index 00000000000..19c25c9db49 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/vect-simd-clone-8.c @@ -0,0 +1,94 @@ +/* { dg-additional-options "-fopenmp-simd" } */ +/* { dg-additional-options "-mavx" { target avx_runtime } } */ + +#include "tree-vect.h" + +#ifndef N +#define N 1024 +#endif + +int a[N], b[N]; +long int c[N]; +unsigned char d[N]; + +#pragma omp declare simd simdlen(8) notinbranch +__attribute__((noinline)) int +foo (long int a, int b, int c) +{ + return a + b + c; +} + +#pragma omp declare simd simdlen(8) notinbranch +__attribute__((noinline)) long int +bar (int a, int b, long int c) +{ + return a + b + c; +} + +__attribute__((noinline)) void +fn1 (void) +{ + int i; + #pragma omp simd + for (i = 0; i < N; i++) + a[i] = foo (c[i], a[i], b[i]) + 6; + #pragma omp simd + for (i = 0; i < N; i++) + c[i] = bar (a[i], b[i], c[i]) * 2; +} + +__attribute__((noinline)) void +fn2 (void) +{ + int i; + #pragma omp simd + for (i = 0; i < N; i++) + { + a[i] = foo (c[i], a[i], b[i]) + 6; + d[i]++; + } + #pragma omp simd + for (i = 0; i < N; i++) + { + c[i] = bar (a[i], b[i], c[i]) * 2; + d[i] /= 2; + } +} + +__attribute__((noinline)) void +fn3 (void) +{ + int i; + for (i = 0; i < N; i++) + { + a[i] = i * 2; + b[i] = 17 + (i % 37); + c[i] = (i & 63); + d[i] = 16 + i; + } +} + +int +main () +{ + int i; + check_vect (); + fn3 (); + fn1 (); + for (i = 0; i < N; i++) + if (a[i] != i * 2 + 23 + (i % 37) + (i & 63) + || b[i] != 17 + (i % 37) + || c[i] != i * 4 + 80 + 4 * (i % 37) + 4 * (i & 63)) + abort (); + fn3 (); + fn2 (); + for (i = 0; i < N; i++) + if (a[i] != i * 2 + 23 + (i % 37) + (i & 63) + || b[i] != 17 + (i % 37) + || c[i] != i * 4 + 80 + 4 * (i % 37) + 4 * (i & 63) + || d[i] != ((unsigned char) (17 + i)) / 2) + abort (); + return 0; +} + +/* { dg-final { cleanup-tree-dump "vect" } } */ diff --git a/gcc/testsuite/gcc.dg/vect/vect-simd-clone-9.c b/gcc/testsuite/gcc.dg/vect/vect-simd-clone-9.c new file mode 100644 index 00000000000..95156b90f01 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/vect-simd-clone-9.c @@ -0,0 +1,94 @@ +/* { dg-additional-options "-fopenmp-simd" } */ +/* { dg-additional-options "-mavx" { target avx_runtime } } */ + +#include "tree-vect.h" + +#ifndef N +#define N 1024 +#endif + +int a[N], b[N]; +long int c[N]; +unsigned char d[N]; + +#pragma omp declare simd notinbranch +__attribute__((noinline)) static int +foo (long int a, int b, int c) +{ + return a + b + c; +} + +#pragma omp declare simd notinbranch +__attribute__((noinline)) static long int +bar (int a, int b, long int c) +{ + return a + b + c; +} + +__attribute__((noinline)) void +fn1 (void) +{ + int i; + #pragma omp simd + for (i = 0; i < N; i++) + a[i] = foo (c[i], a[i], b[i]) + 6; + #pragma omp simd + for (i = 0; i < N; i++) + c[i] = bar (a[i], b[i], c[i]) * 2; +} + +__attribute__((noinline)) void +fn2 (void) +{ + int i; + #pragma omp simd + for (i = 0; i < N; i++) + { + a[i] = foo (c[i], a[i], b[i]) + 6; + d[i]++; + } + #pragma omp simd + for (i = 0; i < N; i++) + { + c[i] = bar (a[i], b[i], c[i]) * 2; + d[i] /= 2; + } +} + +__attribute__((noinline)) void +fn3 (void) +{ + int i; + for (i = 0; i < N; i++) + { + a[i] = i * 2; + b[i] = 17 + (i % 37); + c[i] = (i & 63); + d[i] = 16 + i; + } +} + +int +main () +{ + int i; + check_vect (); + fn3 (); + fn1 (); + for (i = 0; i < N; i++) + if (a[i] != i * 2 + 23 + (i % 37) + (i & 63) + || b[i] != 17 + (i % 37) + || c[i] != i * 4 + 80 + 4 * (i % 37) + 4 * (i & 63)) + abort (); + fn3 (); + fn2 (); + for (i = 0; i < N; i++) + if (a[i] != i * 2 + 23 + (i % 37) + (i & 63) + || b[i] != 17 + (i % 37) + || c[i] != i * 4 + 80 + 4 * (i % 37) + 4 * (i & 63) + || d[i] != ((unsigned char) (17 + i)) / 2) + abort (); + return 0; +} + +/* { dg-final { cleanup-tree-dump "vect" } } */ diff --git a/gcc/tree-core.h b/gcc/tree-core.h index 94ff7f85b16..e2750e0de7e 100644 --- a/gcc/tree-core.h +++ b/gcc/tree-core.h @@ -903,6 +903,9 @@ struct GTY(()) tree_base { CALL_ALLOCA_FOR_VAR_P in CALL_EXPR + OMP_CLAUSE_LINEAR_VARIABLE_STRIDE in + OMP_CLAUSE_LINEAR + side_effects_flag: TREE_SIDE_EFFECTS in diff --git a/gcc/tree-pass.h b/gcc/tree-pass.h index 02d71cd3961..4d570b19fc8 100644 --- a/gcc/tree-pass.h +++ b/gcc/tree-pass.h @@ -472,6 +472,7 @@ extern ipa_opt_pass_d *make_pass_ipa_reference (gcc::context *ctxt); extern ipa_opt_pass_d *make_pass_ipa_pure_const (gcc::context *ctxt); extern simple_ipa_opt_pass *make_pass_ipa_pta (gcc::context *ctxt); extern simple_ipa_opt_pass *make_pass_ipa_tm (gcc::context *ctxt); +extern simple_ipa_opt_pass *make_pass_omp_simd_clone (gcc::context *ctxt); extern ipa_opt_pass_d *make_pass_ipa_profile (gcc::context *ctxt); extern ipa_opt_pass_d *make_pass_ipa_cdtor_merge (gcc::context *ctxt); diff --git a/gcc/tree-vect-data-refs.c b/gcc/tree-vect-data-refs.c index 82616450e1e..a61f2a1a868 100644 --- a/gcc/tree-vect-data-refs.c +++ b/gcc/tree-vect-data-refs.c @@ -53,6 +53,7 @@ along with GCC; see the file COPYING3. If not see #include "tree-scalar-evolution.h" #include "tree-vectorizer.h" #include "diagnostic-core.h" +#include "cgraph.h" /* Need to include rtl.h, expr.h, etc. for optabs. */ #include "expr.h" #include "optabs.h" @@ -3167,10 +3168,11 @@ vect_analyze_data_refs (loop_vec_info loop_vinfo, if (loop_vinfo) { + basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo); + loop = LOOP_VINFO_LOOP (loop_vinfo); - if (!find_loop_nest (loop, &LOOP_VINFO_LOOP_NEST (loop_vinfo)) - || find_data_references_in_loop - (loop, &LOOP_VINFO_DATAREFS (loop_vinfo))) + datarefs = LOOP_VINFO_DATAREFS (loop_vinfo); + if (!find_loop_nest (loop, &LOOP_VINFO_LOOP_NEST (loop_vinfo))) { if (dump_enabled_p ()) dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, @@ -3179,7 +3181,57 @@ vect_analyze_data_refs (loop_vec_info loop_vinfo, return false; } - datarefs = LOOP_VINFO_DATAREFS (loop_vinfo); + for (i = 0; i < loop->num_nodes; i++) + { + gimple_stmt_iterator gsi; + + for (gsi = gsi_start_bb (bbs[i]); !gsi_end_p (gsi); gsi_next (&gsi)) + { + gimple stmt = gsi_stmt (gsi); + if (!find_data_references_in_stmt (loop, stmt, &datarefs)) + { + if (is_gimple_call (stmt) && loop->safelen) + { + tree fndecl = gimple_call_fndecl (stmt), op; + if (fndecl != NULL_TREE) + { + struct cgraph_node *node = cgraph_get_node (fndecl); + if (node != NULL && node->simd_clones != NULL) + { + unsigned int j, n = gimple_call_num_args (stmt); + for (j = 0; j < n; j++) + { + op = gimple_call_arg (stmt, j); + if (DECL_P (op) + || (REFERENCE_CLASS_P (op) + && get_base_address (op))) + break; + } + op = gimple_call_lhs (stmt); + /* Ignore #pragma omp declare simd functions + if they don't have data references in the + call stmt itself. */ + if (j == n + && !(op + && (DECL_P (op) + || (REFERENCE_CLASS_P (op) + && get_base_address (op))))) + continue; + } + } + } + LOOP_VINFO_DATAREFS (loop_vinfo) = datarefs; + if (dump_enabled_p ()) + dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, + "not vectorized: loop contains function " + "calls or data references that cannot " + "be analyzed\n"); + return false; + } + } + } + + LOOP_VINFO_DATAREFS (loop_vinfo) = datarefs; } else { diff --git a/gcc/tree-vect-loop.c b/gcc/tree-vect-loop.c index 50c9d9caa34..42a78de3265 100644 --- a/gcc/tree-vect-loop.c +++ b/gcc/tree-vect-loop.c @@ -376,6 +376,19 @@ vect_determine_vectorization_factor (loop_vec_info loop_vinfo) if (gimple_get_lhs (stmt) == NULL_TREE) { + if (is_gimple_call (stmt)) + { + /* Ignore calls with no lhs. These must be calls to + #pragma omp simd functions, and what vectorization factor + it really needs can't be determined until + vectorizable_simd_clone_call. */ + if (!analyze_pattern_stmt && gsi_end_p (pattern_def_si)) + { + pattern_def_seq = NULL; + gsi_next (&si); + } + continue; + } if (dump_enabled_p ()) { dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, @@ -5699,7 +5712,6 @@ vect_transform_loop (loop_vec_info loop_vinfo) int vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo); bool grouped_store; bool slp_scheduled = false; - unsigned int nunits; gimple stmt, pattern_stmt; gimple_seq pattern_def_seq = NULL; gimple_stmt_iterator pattern_def_si = gsi_none (); @@ -5957,16 +5969,18 @@ vect_transform_loop (loop_vec_info loop_vinfo) transform_pattern_stmt = false; } - gcc_assert (STMT_VINFO_VECTYPE (stmt_info)); - nunits = (unsigned int) TYPE_VECTOR_SUBPARTS ( - STMT_VINFO_VECTYPE (stmt_info)); - if (!STMT_SLP_TYPE (stmt_info) - && nunits != (unsigned int) vectorization_factor - && dump_enabled_p ()) - /* For SLP VF is set according to unrolling factor, and not to - vector size, hence for SLP this print is not valid. */ - dump_printf_loc (MSG_NOTE, vect_location, - "multiple-types.\n"); + if (STMT_VINFO_VECTYPE (stmt_info)) + { + unsigned int nunits + = (unsigned int) + TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (stmt_info)); + if (!STMT_SLP_TYPE (stmt_info) + && nunits != (unsigned int) vectorization_factor + && dump_enabled_p ()) + /* For SLP VF is set according to unrolling factor, and not + to vector size, hence for SLP this print is not valid. */ + dump_printf_loc (MSG_NOTE, vect_location, "multiple-types.\n"); + } /* SLP. Schedule all the SLP instances when the first SLP stmt is reached. */ diff --git a/gcc/tree-vect-stmts.c b/gcc/tree-vect-stmts.c index 72dfacd3151..ff7b59aa52d 100644 --- a/gcc/tree-vect-stmts.c +++ b/gcc/tree-vect-stmts.c @@ -46,12 +46,15 @@ along with GCC; see the file COPYING3. If not see #include "tree-ssanames.h" #include "tree-ssa-loop-manip.h" #include "cfgloop.h" +#include "tree-ssa-loop.h" +#include "tree-scalar-evolution.h" #include "expr.h" #include "recog.h" /* FIXME: for insn_data */ #include "optabs.h" #include "diagnostic-core.h" #include "tree-vectorizer.h" #include "dumpfile.h" +#include "cgraph.h" /* For lang_hooks.types.type_for_mode. */ #include "langhooks.h" @@ -1735,11 +1738,11 @@ vectorizable_call (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt, if (!is_gimple_call (stmt)) return false; - if (TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME) + if (gimple_call_lhs (stmt) == NULL_TREE + || TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME) return false; - if (stmt_can_throw_internal (stmt)) - return false; + gcc_checking_assert (!stmt_can_throw_internal (stmt)); vectype_out = STMT_VINFO_VECTYPE (stmt_info); @@ -2082,10 +2085,6 @@ vectorizable_call (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt, vargs.release (); - /* Update the exception handling table with the vector stmt if necessary. */ - if (maybe_clean_or_replace_eh_stmt (stmt, *vec_stmt)) - gimple_purge_dead_eh_edges (gimple_bb (stmt)); - /* The call in STMT might prevent it from being removed in dce. We however cannot remove it here, due to the way the ssa name it defines is mapped to the new definition. So just replace @@ -2109,6 +2108,605 @@ vectorizable_call (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt, } +struct simd_call_arg_info +{ + tree vectype; + tree op; + enum vect_def_type dt; + HOST_WIDE_INT linear_step; + unsigned int align; +}; + +/* Function vectorizable_simd_clone_call. + + Check if STMT performs a function call that can be vectorized + by calling a simd clone of the function. + If VEC_STMT is also passed, vectorize the STMT: create a vectorized + stmt to replace it, put it in VEC_STMT, and insert it at BSI. + Return FALSE if not a vectorizable STMT, TRUE otherwise. */ + +static bool +vectorizable_simd_clone_call (gimple stmt, gimple_stmt_iterator *gsi, + gimple *vec_stmt, slp_tree slp_node) +{ + tree vec_dest; + tree scalar_dest; + tree op, type; + tree vec_oprnd0 = NULL_TREE; + stmt_vec_info stmt_info = vinfo_for_stmt (stmt), prev_stmt_info; + tree vectype; + unsigned int nunits; + loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info); + bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info); + struct loop *loop = loop_vinfo ? LOOP_VINFO_LOOP (loop_vinfo) : NULL; + tree fndecl, new_temp, def; + gimple def_stmt; + gimple new_stmt = NULL; + int ncopies, j; + vec<simd_call_arg_info> arginfo = vNULL; + vec<tree> vargs = vNULL; + size_t i, nargs; + tree lhs, rtype, ratype; + vec<constructor_elt, va_gc> *ret_ctor_elts; + + /* Is STMT a vectorizable call? */ + if (!is_gimple_call (stmt)) + return false; + + fndecl = gimple_call_fndecl (stmt); + if (fndecl == NULL_TREE) + return false; + + struct cgraph_node *node = cgraph_get_node (fndecl); + if (node == NULL || node->simd_clones == NULL) + return false; + + if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo) + return false; + + if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def) + return false; + + if (gimple_call_lhs (stmt) + && TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME) + return false; + + gcc_checking_assert (!stmt_can_throw_internal (stmt)); + + vectype = STMT_VINFO_VECTYPE (stmt_info); + + if (loop_vinfo && nested_in_vect_loop_p (loop, stmt)) + return false; + + /* FORNOW */ + if (slp_node || PURE_SLP_STMT (stmt_info)) + return false; + + /* Process function arguments. */ + nargs = gimple_call_num_args (stmt); + + /* Bail out if the function has zero arguments. */ + if (nargs == 0) + return false; + + arginfo.create (nargs); + + for (i = 0; i < nargs; i++) + { + simd_call_arg_info thisarginfo; + affine_iv iv; + + thisarginfo.linear_step = 0; + thisarginfo.align = 0; + thisarginfo.op = NULL_TREE; + + op = gimple_call_arg (stmt, i); + if (!vect_is_simple_use_1 (op, stmt, loop_vinfo, bb_vinfo, + &def_stmt, &def, &thisarginfo.dt, + &thisarginfo.vectype) + || thisarginfo.dt == vect_uninitialized_def) + { + if (dump_enabled_p ()) + dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, + "use not simple.\n"); + arginfo.release (); + return false; + } + + if (thisarginfo.dt == vect_constant_def + || thisarginfo.dt == vect_external_def) + gcc_assert (thisarginfo.vectype == NULL_TREE); + else + gcc_assert (thisarginfo.vectype != NULL_TREE); + + if (thisarginfo.dt != vect_constant_def + && thisarginfo.dt != vect_external_def + && loop_vinfo + && TREE_CODE (op) == SSA_NAME + && simple_iv (loop, loop_containing_stmt (stmt), op, &iv, false) + && tree_fits_shwi_p (iv.step)) + { + thisarginfo.linear_step = tree_to_shwi (iv.step); + thisarginfo.op = iv.base; + } + else if ((thisarginfo.dt == vect_constant_def + || thisarginfo.dt == vect_external_def) + && POINTER_TYPE_P (TREE_TYPE (op))) + thisarginfo.align = get_pointer_alignment (op) / BITS_PER_UNIT; + + arginfo.quick_push (thisarginfo); + } + + unsigned int badness = 0; + struct cgraph_node *bestn = NULL; + if (STMT_VINFO_SIMD_CLONE_FNDECL (stmt_info)) + bestn = cgraph_get_node (STMT_VINFO_SIMD_CLONE_FNDECL (stmt_info)); + else + for (struct cgraph_node *n = node->simd_clones; n != NULL; + n = n->simdclone->next_clone) + { + unsigned int this_badness = 0; + if (n->simdclone->simdlen + > (unsigned) LOOP_VINFO_VECT_FACTOR (loop_vinfo) + || n->simdclone->nargs != nargs) + continue; + if (n->simdclone->simdlen + < (unsigned) LOOP_VINFO_VECT_FACTOR (loop_vinfo)) + this_badness += (exact_log2 (LOOP_VINFO_VECT_FACTOR (loop_vinfo)) + - exact_log2 (n->simdclone->simdlen)) * 1024; + if (n->simdclone->inbranch) + this_badness += 2048; + int target_badness = targetm.simd_clone.usable (n); + if (target_badness < 0) + continue; + this_badness += target_badness * 512; + /* FORNOW: Have to add code to add the mask argument. */ + if (n->simdclone->inbranch) + continue; + for (i = 0; i < nargs; i++) + { + switch (n->simdclone->args[i].arg_type) + { + case SIMD_CLONE_ARG_TYPE_VECTOR: + if (!useless_type_conversion_p + (n->simdclone->args[i].orig_type, + TREE_TYPE (gimple_call_arg (stmt, i)))) + i = -1; + else if (arginfo[i].dt == vect_constant_def + || arginfo[i].dt == vect_external_def + || arginfo[i].linear_step) + this_badness += 64; + break; + case SIMD_CLONE_ARG_TYPE_UNIFORM: + if (arginfo[i].dt != vect_constant_def + && arginfo[i].dt != vect_external_def) + i = -1; + break; + case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP: + if (arginfo[i].dt == vect_constant_def + || arginfo[i].dt == vect_external_def + || (arginfo[i].linear_step + != n->simdclone->args[i].linear_step)) + i = -1; + break; + case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP: + /* FORNOW */ + i = -1; + break; + case SIMD_CLONE_ARG_TYPE_MASK: + gcc_unreachable (); + } + if (i == (size_t) -1) + break; + if (n->simdclone->args[i].alignment > arginfo[i].align) + { + i = -1; + break; + } + if (arginfo[i].align) + this_badness += (exact_log2 (arginfo[i].align) + - exact_log2 (n->simdclone->args[i].alignment)); + } + if (i == (size_t) -1) + continue; + if (bestn == NULL || this_badness < badness) + { + bestn = n; + badness = this_badness; + } + } + + if (bestn == NULL) + { + arginfo.release (); + return false; + } + + for (i = 0; i < nargs; i++) + if ((arginfo[i].dt == vect_constant_def + || arginfo[i].dt == vect_external_def) + && bestn->simdclone->args[i].arg_type == SIMD_CLONE_ARG_TYPE_VECTOR) + { + arginfo[i].vectype + = get_vectype_for_scalar_type (TREE_TYPE (gimple_call_arg (stmt, + i))); + if (arginfo[i].vectype == NULL + || (TYPE_VECTOR_SUBPARTS (arginfo[i].vectype) + > bestn->simdclone->simdlen)) + { + arginfo.release (); + return false; + } + } + + fndecl = bestn->decl; + nunits = bestn->simdclone->simdlen; + ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits; + + /* If the function isn't const, only allow it in simd loops where user + has asserted that at least nunits consecutive iterations can be + performed using SIMD instructions. */ + if ((loop == NULL || (unsigned) loop->safelen < nunits) + && gimple_vuse (stmt)) + { + arginfo.release (); + return false; + } + + /* Sanity check: make sure that at least one copy of the vectorized stmt + needs to be generated. */ + gcc_assert (ncopies >= 1); + + if (!vec_stmt) /* transformation not required. */ + { + STMT_VINFO_SIMD_CLONE_FNDECL (stmt_info) = bestn->decl; + STMT_VINFO_TYPE (stmt_info) = call_simd_clone_vec_info_type; + if (dump_enabled_p ()) + dump_printf_loc (MSG_NOTE, vect_location, + "=== vectorizable_simd_clone_call ===\n"); +/* vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL); */ + arginfo.release (); + return true; + } + + /** Transform. **/ + + if (dump_enabled_p ()) + dump_printf_loc (MSG_NOTE, vect_location, "transform call.\n"); + + /* Handle def. */ + scalar_dest = gimple_call_lhs (stmt); + vec_dest = NULL_TREE; + rtype = NULL_TREE; + ratype = NULL_TREE; + if (scalar_dest) + { + vec_dest = vect_create_destination_var (scalar_dest, vectype); + rtype = TREE_TYPE (TREE_TYPE (fndecl)); + if (TREE_CODE (rtype) == ARRAY_TYPE) + { + ratype = rtype; + rtype = TREE_TYPE (ratype); + } + } + + prev_stmt_info = NULL; + for (j = 0; j < ncopies; ++j) + { + /* Build argument list for the vectorized call. */ + if (j == 0) + vargs.create (nargs); + else + vargs.truncate (0); + + for (i = 0; i < nargs; i++) + { + unsigned int k, l, m, o; + tree atype; + op = gimple_call_arg (stmt, i); + switch (bestn->simdclone->args[i].arg_type) + { + case SIMD_CLONE_ARG_TYPE_VECTOR: + atype = bestn->simdclone->args[i].vector_type; + o = nunits / TYPE_VECTOR_SUBPARTS (atype); + for (m = j * o; m < (j + 1) * o; m++) + { + if (TYPE_VECTOR_SUBPARTS (atype) + < TYPE_VECTOR_SUBPARTS (arginfo[i].vectype)) + { + unsigned int prec = GET_MODE_BITSIZE (TYPE_MODE (atype)); + k = (TYPE_VECTOR_SUBPARTS (arginfo[i].vectype) + / TYPE_VECTOR_SUBPARTS (atype)); + gcc_assert ((k & (k - 1)) == 0); + if (m == 0) + vec_oprnd0 + = vect_get_vec_def_for_operand (op, stmt, NULL); + else + { + vec_oprnd0 = arginfo[i].op; + if ((m & (k - 1)) == 0) + vec_oprnd0 + = vect_get_vec_def_for_stmt_copy (arginfo[i].dt, + vec_oprnd0); + } + arginfo[i].op = vec_oprnd0; + vec_oprnd0 + = build3 (BIT_FIELD_REF, atype, vec_oprnd0, + size_int (prec), + bitsize_int ((m & (k - 1)) * prec)); + new_stmt + = gimple_build_assign (make_ssa_name (atype, NULL), + vec_oprnd0); + vect_finish_stmt_generation (stmt, new_stmt, gsi); + vargs.safe_push (gimple_assign_lhs (new_stmt)); + } + else + { + k = (TYPE_VECTOR_SUBPARTS (atype) + / TYPE_VECTOR_SUBPARTS (arginfo[i].vectype)); + gcc_assert ((k & (k - 1)) == 0); + vec<constructor_elt, va_gc> *ctor_elts; + if (k != 1) + vec_alloc (ctor_elts, k); + else + ctor_elts = NULL; + for (l = 0; l < k; l++) + { + if (m == 0 && l == 0) + vec_oprnd0 + = vect_get_vec_def_for_operand (op, stmt, NULL); + else + vec_oprnd0 + = vect_get_vec_def_for_stmt_copy (arginfo[i].dt, + arginfo[i].op); + arginfo[i].op = vec_oprnd0; + if (k == 1) + break; + CONSTRUCTOR_APPEND_ELT (ctor_elts, NULL_TREE, + vec_oprnd0); + } + if (k == 1) + vargs.safe_push (vec_oprnd0); + else + { + vec_oprnd0 = build_constructor (atype, ctor_elts); + new_stmt + = gimple_build_assign (make_ssa_name (atype, NULL), + vec_oprnd0); + vect_finish_stmt_generation (stmt, new_stmt, gsi); + vargs.safe_push (gimple_assign_lhs (new_stmt)); + } + } + } + break; + case SIMD_CLONE_ARG_TYPE_UNIFORM: + vargs.safe_push (op); + break; + case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP: + if (j == 0) + { + gimple_seq stmts; + arginfo[i].op + = force_gimple_operand (arginfo[i].op, &stmts, true, + NULL_TREE); + if (stmts != NULL) + { + basic_block new_bb; + edge pe = loop_preheader_edge (loop); + new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts); + gcc_assert (!new_bb); + } + tree phi_res = copy_ssa_name (op, NULL); + gimple new_phi = create_phi_node (phi_res, loop->header); + set_vinfo_for_stmt (new_phi, + new_stmt_vec_info (new_phi, loop_vinfo, + NULL)); + add_phi_arg (new_phi, arginfo[i].op, + loop_preheader_edge (loop), UNKNOWN_LOCATION); + enum tree_code code + = POINTER_TYPE_P (TREE_TYPE (op)) + ? POINTER_PLUS_EXPR : PLUS_EXPR; + tree type = POINTER_TYPE_P (TREE_TYPE (op)) + ? sizetype : TREE_TYPE (op); + double_int cst + = double_int::from_shwi + (bestn->simdclone->args[i].linear_step); + cst *= double_int::from_uhwi (ncopies * nunits); + tree tcst = double_int_to_tree (type, cst); + tree phi_arg = copy_ssa_name (op, NULL); + new_stmt = gimple_build_assign_with_ops (code, phi_arg, + phi_res, tcst); + gimple_stmt_iterator si = gsi_after_labels (loop->header); + gsi_insert_after (&si, new_stmt, GSI_NEW_STMT); + set_vinfo_for_stmt (new_stmt, + new_stmt_vec_info (new_stmt, loop_vinfo, + NULL)); + add_phi_arg (new_phi, phi_arg, loop_latch_edge (loop), + UNKNOWN_LOCATION); + arginfo[i].op = phi_res; + vargs.safe_push (phi_res); + } + else + { + enum tree_code code + = POINTER_TYPE_P (TREE_TYPE (op)) + ? POINTER_PLUS_EXPR : PLUS_EXPR; + tree type = POINTER_TYPE_P (TREE_TYPE (op)) + ? sizetype : TREE_TYPE (op); + double_int cst + = double_int::from_shwi + (bestn->simdclone->args[i].linear_step); + cst *= double_int::from_uhwi (j * nunits); + tree tcst = double_int_to_tree (type, cst); + new_temp = make_ssa_name (TREE_TYPE (op), NULL); + new_stmt + = gimple_build_assign_with_ops (code, new_temp, + arginfo[i].op, tcst); + vect_finish_stmt_generation (stmt, new_stmt, gsi); + vargs.safe_push (new_temp); + } + break; + case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP: + default: + gcc_unreachable (); + } + } + + new_stmt = gimple_build_call_vec (fndecl, vargs); + if (vec_dest) + { + gcc_assert (ratype || TYPE_VECTOR_SUBPARTS (rtype) == nunits); + if (ratype) + new_temp = create_tmp_var (ratype, NULL); + else if (TYPE_VECTOR_SUBPARTS (vectype) + == TYPE_VECTOR_SUBPARTS (rtype)) + new_temp = make_ssa_name (vec_dest, new_stmt); + else + new_temp = make_ssa_name (rtype, new_stmt); + gimple_call_set_lhs (new_stmt, new_temp); + } + vect_finish_stmt_generation (stmt, new_stmt, gsi); + + if (vec_dest) + { + if (TYPE_VECTOR_SUBPARTS (vectype) < nunits) + { + unsigned int k, l; + unsigned int prec = GET_MODE_BITSIZE (TYPE_MODE (vectype)); + k = nunits / TYPE_VECTOR_SUBPARTS (vectype); + gcc_assert ((k & (k - 1)) == 0); + for (l = 0; l < k; l++) + { + tree t; + if (ratype) + { + t = build_fold_addr_expr (new_temp); + t = build2 (MEM_REF, vectype, t, + build_int_cst (TREE_TYPE (t), + l * prec / BITS_PER_UNIT)); + } + else + t = build3 (BIT_FIELD_REF, vectype, new_temp, + size_int (prec), bitsize_int (l * prec)); + new_stmt + = gimple_build_assign (make_ssa_name (vectype, NULL), t); + vect_finish_stmt_generation (stmt, new_stmt, gsi); + if (j == 0 && l == 0) + STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt; + else + STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt; + + prev_stmt_info = vinfo_for_stmt (new_stmt); + } + + if (ratype) + { + tree clobber = build_constructor (ratype, NULL); + TREE_THIS_VOLATILE (clobber) = 1; + new_stmt = gimple_build_assign (new_temp, clobber); + vect_finish_stmt_generation (stmt, new_stmt, gsi); + } + continue; + } + else if (TYPE_VECTOR_SUBPARTS (vectype) > nunits) + { + unsigned int k = (TYPE_VECTOR_SUBPARTS (vectype) + / TYPE_VECTOR_SUBPARTS (rtype)); + gcc_assert ((k & (k - 1)) == 0); + if ((j & (k - 1)) == 0) + vec_alloc (ret_ctor_elts, k); + if (ratype) + { + unsigned int m, o = nunits / TYPE_VECTOR_SUBPARTS (rtype); + for (m = 0; m < o; m++) + { + tree tem = build4 (ARRAY_REF, rtype, new_temp, + size_int (m), NULL_TREE, NULL_TREE); + new_stmt + = gimple_build_assign (make_ssa_name (rtype, NULL), + tem); + vect_finish_stmt_generation (stmt, new_stmt, gsi); + CONSTRUCTOR_APPEND_ELT (ret_ctor_elts, NULL_TREE, + gimple_assign_lhs (new_stmt)); + } + tree clobber = build_constructor (ratype, NULL); + TREE_THIS_VOLATILE (clobber) = 1; + new_stmt = gimple_build_assign (new_temp, clobber); + vect_finish_stmt_generation (stmt, new_stmt, gsi); + } + else + CONSTRUCTOR_APPEND_ELT (ret_ctor_elts, NULL_TREE, new_temp); + if ((j & (k - 1)) != k - 1) + continue; + vec_oprnd0 = build_constructor (vectype, ret_ctor_elts); + new_stmt + = gimple_build_assign (make_ssa_name (vec_dest, NULL), + vec_oprnd0); + vect_finish_stmt_generation (stmt, new_stmt, gsi); + + if ((unsigned) j == k - 1) + STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt; + else + STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt; + + prev_stmt_info = vinfo_for_stmt (new_stmt); + continue; + } + else if (ratype) + { + tree t = build_fold_addr_expr (new_temp); + t = build2 (MEM_REF, vectype, t, + build_int_cst (TREE_TYPE (t), 0)); + new_stmt + = gimple_build_assign (make_ssa_name (vec_dest, NULL), t); + vect_finish_stmt_generation (stmt, new_stmt, gsi); + tree clobber = build_constructor (ratype, NULL); + TREE_THIS_VOLATILE (clobber) = 1; + vect_finish_stmt_generation (stmt, + gimple_build_assign (new_temp, + clobber), gsi); + } + } + + if (j == 0) + STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt; + else + STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt; + + prev_stmt_info = vinfo_for_stmt (new_stmt); + } + + vargs.release (); + + /* The call in STMT might prevent it from being removed in dce. + We however cannot remove it here, due to the way the ssa name + it defines is mapped to the new definition. So just replace + rhs of the statement with something harmless. */ + + if (slp_node) + return true; + + if (scalar_dest) + { + type = TREE_TYPE (scalar_dest); + if (is_pattern_stmt_p (stmt_info)) + lhs = gimple_call_lhs (STMT_VINFO_RELATED_STMT (stmt_info)); + else + lhs = gimple_call_lhs (stmt); + new_stmt = gimple_build_assign (lhs, build_zero_cst (type)); + } + else + new_stmt = gimple_build_nop (); + set_vinfo_for_stmt (new_stmt, stmt_info); + set_vinfo_for_stmt (stmt, NULL); + STMT_VINFO_STMT (stmt_info) = new_stmt; + gsi_replace (gsi, new_stmt, false); + unlink_stmt_vdef (stmt); + + return true; +} + + /* Function vect_gen_widened_results_half Create a vector stmt whose code, type, number of arguments, and result @@ -5819,7 +6417,9 @@ vect_analyze_stmt (gimple stmt, bool *need_to_vectorize, slp_tree node) if (STMT_VINFO_RELEVANT_P (stmt_info)) { gcc_assert (!VECTOR_MODE_P (TYPE_MODE (gimple_expr_type (stmt)))); - gcc_assert (STMT_VINFO_VECTYPE (stmt_info)); + gcc_assert (STMT_VINFO_VECTYPE (stmt_info) + || (is_gimple_call (stmt) + && gimple_call_lhs (stmt) == NULL_TREE)); *need_to_vectorize = true; } @@ -5827,7 +6427,8 @@ vect_analyze_stmt (gimple stmt, bool *need_to_vectorize, slp_tree node) if (!bb_vinfo && (STMT_VINFO_RELEVANT_P (stmt_info) || STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def)) - ok = (vectorizable_conversion (stmt, NULL, NULL, NULL) + ok = (vectorizable_simd_clone_call (stmt, NULL, NULL, NULL) + || vectorizable_conversion (stmt, NULL, NULL, NULL) || vectorizable_shift (stmt, NULL, NULL, NULL) || vectorizable_operation (stmt, NULL, NULL, NULL) || vectorizable_assignment (stmt, NULL, NULL, NULL) @@ -5839,7 +6440,8 @@ vect_analyze_stmt (gimple stmt, bool *need_to_vectorize, slp_tree node) else { if (bb_vinfo) - ok = (vectorizable_conversion (stmt, NULL, NULL, node) + ok = (vectorizable_simd_clone_call (stmt, NULL, NULL, node) + || vectorizable_conversion (stmt, NULL, NULL, node) || vectorizable_shift (stmt, NULL, NULL, node) || vectorizable_operation (stmt, NULL, NULL, node) || vectorizable_assignment (stmt, NULL, NULL, node) @@ -5967,6 +6569,11 @@ vect_transform_stmt (gimple stmt, gimple_stmt_iterator *gsi, stmt = gsi_stmt (*gsi); break; + case call_simd_clone_vec_info_type: + done = vectorizable_simd_clone_call (stmt, gsi, &vec_stmt, slp_node); + stmt = gsi_stmt (*gsi); + break; + case reduc_vec_info_type: done = vectorizable_reduction (stmt, gsi, &vec_stmt, slp_node); gcc_assert (done); diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h index 58884f85502..39d7d41c65e 100644 --- a/gcc/tree-vectorizer.h +++ b/gcc/tree-vectorizer.h @@ -443,6 +443,7 @@ enum stmt_vec_info_type { shift_vec_info_type, op_vec_info_type, call_vec_info_type, + call_simd_clone_vec_info_type, assignment_vec_info_type, condition_vec_info_type, reduc_vec_info_type, @@ -565,6 +566,9 @@ typedef struct _stmt_vec_info { of this stmt. */ vec<dr_p> same_align_refs; + /* Selected SIMD clone's function decl. */ + tree simd_clone_fndecl; + /* Classify the def of this stmt. */ enum vect_def_type def_type; @@ -633,6 +637,7 @@ typedef struct _stmt_vec_info { #define STMT_VINFO_RELATED_STMT(S) (S)->related_stmt #define STMT_VINFO_PATTERN_DEF_SEQ(S) (S)->pattern_def_seq #define STMT_VINFO_SAME_ALIGN_REFS(S) (S)->same_align_refs +#define STMT_VINFO_SIMD_CLONE_FNDECL(S) (S)->simd_clone_fndecl #define STMT_VINFO_DEF_TYPE(S) (S)->def_type #define STMT_VINFO_GROUP_FIRST_ELEMENT(S) (S)->first_element #define STMT_VINFO_GROUP_NEXT_ELEMENT(S) (S)->next_element diff --git a/gcc/tree.h b/gcc/tree.h index be57f651fd6..88c8d56bf3a 100644 --- a/gcc/tree.h +++ b/gcc/tree.h @@ -1344,6 +1344,10 @@ extern void protected_set_expr_location (tree, location_t); #define OMP_CLAUSE_LINEAR_NO_COPYOUT(NODE) \ TREE_PRIVATE (OMP_CLAUSE_SUBCODE_CHECK (NODE, OMP_CLAUSE_LINEAR)) +/* True if a LINEAR clause has a stride that is variable. */ +#define OMP_CLAUSE_LINEAR_VARIABLE_STRIDE(NODE) \ + TREE_PROTECTED (OMP_CLAUSE_SUBCODE_CHECK (NODE, OMP_CLAUSE_LINEAR)) + #define OMP_CLAUSE_LINEAR_STEP(NODE) \ OMP_CLAUSE_OPERAND (OMP_CLAUSE_SUBCODE_CHECK (NODE, OMP_CLAUSE_LINEAR), 1) |