diff options
author | Lorry Tar Creator <lorry-tar-importer@lorry> | 2017-05-02 14:43:35 +0000 |
---|---|---|
committer | Lorry Tar Creator <lorry-tar-importer@lorry> | 2017-05-02 14:43:35 +0000 |
commit | 34efdaf078b01a7387007c4e6bde6db86384c4b7 (patch) | |
tree | d503eaf41d085669d1481bb46ec038bc866fece6 /gcc/tree-vectorizer.c | |
parent | f733cf303bcdc952c92b81dd62199a40a1f555ec (diff) | |
download | gcc-tarball-master.tar.gz |
gcc-7.1.0gcc-7.1.0
Diffstat (limited to 'gcc/tree-vectorizer.c')
-rw-r--r-- | gcc/tree-vectorizer.c | 305 |
1 files changed, 276 insertions, 29 deletions
diff --git a/gcc/tree-vectorizer.c b/gcc/tree-vectorizer.c index 2b25b4503a..f928dec3d9 100644 --- a/gcc/tree-vectorizer.c +++ b/gcc/tree-vectorizer.c @@ -1,5 +1,5 @@ /* Vectorizer - Copyright (C) 2003-2016 Free Software Foundation, Inc. + Copyright (C) 2003-2017 Free Software Foundation, Inc. Contributed by Dorit Naishlos <dorit@il.ibm.com> This file is part of GCC. @@ -69,6 +69,7 @@ along with GCC; see the file COPYING3. If not see #include "gimple-iterator.h" #include "gimple-walk.h" #include "tree-ssa-loop-manip.h" +#include "tree-ssa-loop-niter.h" #include "tree-cfg.h" #include "cfgloop.h" #include "tree-vectorizer.h" @@ -204,6 +205,10 @@ adjust_simduid_builtins (hash_table<simduid_to_vf> *htab) gcc_assert (TREE_CODE (arg) == SSA_NAME); simduid_to_vf *p = NULL, data; data.simduid = DECL_UID (SSA_NAME_VAR (arg)); + /* Need to nullify loop safelen field since it's value is not + valid after transformation. */ + if (bb->loop_father && bb->loop_father->safelen > 0) + bb->loop_father->safelen = 0; if (htab) { p = htab->find (&data); @@ -364,6 +369,20 @@ vect_destroy_datarefs (vec_info *vinfo) free_data_refs (vinfo->datarefs); } +/* A helper function to free scev and LOOP niter information, as well as + clear loop constraint LOOP_C_FINITE. */ + +void +vect_free_loop_info_assumptions (struct loop *loop) +{ + scev_reset_htab (); + /* We need to explicitly reset upper bound information since they are + used even after free_numbers_of_iterations_estimates_loop. */ + loop->any_upper_bound = false; + loop->any_likely_upper_bound = false; + free_numbers_of_iterations_estimates_loop (loop); + loop_constraint_clear (loop, LOOP_C_FINITE); +} /* Return whether STMT is inside the region we try to vectorize. */ @@ -417,9 +436,7 @@ vect_loop_vectorized_call (struct loop *loop) if (!gsi_end_p (gsi)) { g = gsi_stmt (gsi); - if (is_gimple_call (g) - && gimple_call_internal_p (g) - && gimple_call_internal_fn (g) == IFN_LOOP_VECTORIZED + if (gimple_call_internal_p (g, IFN_LOOP_VECTORIZED) && (tree_to_shwi (gimple_call_arg (g, 0)) == loop->num || tree_to_shwi (gimple_call_arg (g, 1)) == loop->num)) return g; @@ -448,6 +465,7 @@ fold_loop_vectorized_call (gimple *g, tree value) update_stmt (use_stmt); } } + /* Set the uids of all the statements in basic blocks inside loop represented by LOOP_VINFO. LOOP_VECTORIZED_CALL is the internal call guarding the loop which has been if converted. */ @@ -460,9 +478,22 @@ set_uid_loop_bbs (loop_vec_info loop_vinfo, gimple *loop_vectorized_call) struct loop *scalar_loop = get_loop (cfun, tree_to_shwi (arg)); LOOP_VINFO_SCALAR_LOOP (loop_vinfo) = scalar_loop; - gcc_checking_assert (vect_loop_vectorized_call - (LOOP_VINFO_SCALAR_LOOP (loop_vinfo)) + gcc_checking_assert (vect_loop_vectorized_call (scalar_loop) == loop_vectorized_call); + /* If we are going to vectorize outer loop, prevent vectorization + of the inner loop in the scalar loop - either the scalar loop is + thrown away, so it is a wasted work, or is used only for + a few iterations. */ + if (scalar_loop->inner) + { + gimple *g = vect_loop_vectorized_call (scalar_loop->inner); + if (g) + { + arg = gimple_call_arg (g, 0); + get_loop (cfun, tree_to_shwi (arg))->dont_vectorize = true; + fold_loop_vectorized_call (g, boolean_false_node); + } + } bbs = get_loop_body (scalar_loop); for (i = 0; i < scalar_loop->num_nodes; i++) { @@ -497,6 +528,7 @@ vectorize_loops (void) hash_table<simd_array_to_simduid> *simd_array_to_simduid_htab = NULL; bool any_ifcvt_loops = false; unsigned ret = 0; + struct loop *new_loop; vect_loops_num = number_of_loops (cfun); @@ -516,12 +548,59 @@ vectorize_loops (void) only over initial loops skipping newly generated ones. */ FOR_EACH_LOOP (loop, 0) if (loop->dont_vectorize) - any_ifcvt_loops = true; - else if ((flag_tree_loop_vectorize - && optimize_loop_nest_for_speed_p (loop)) - || loop->force_vectorize) { - loop_vec_info loop_vinfo; + any_ifcvt_loops = true; + /* If-conversion sometimes versions both the outer loop + (for the case when outer loop vectorization might be + desirable) as well as the inner loop in the scalar version + of the loop. So we have: + if (LOOP_VECTORIZED (1, 3)) + { + loop1 + loop2 + } + else + loop3 (copy of loop1) + if (LOOP_VECTORIZED (4, 5)) + loop4 (copy of loop2) + else + loop5 (copy of loop4) + If FOR_EACH_LOOP gives us loop3 first (which has + dont_vectorize set), make sure to process loop1 before loop4; + so that we can prevent vectorization of loop4 if loop1 + is successfully vectorized. */ + if (loop->inner) + { + gimple *loop_vectorized_call + = vect_loop_vectorized_call (loop); + if (loop_vectorized_call + && vect_loop_vectorized_call (loop->inner)) + { + tree arg = gimple_call_arg (loop_vectorized_call, 0); + struct loop *vector_loop + = get_loop (cfun, tree_to_shwi (arg)); + if (vector_loop && vector_loop != loop) + { + loop = vector_loop; + /* Make sure we don't vectorize it twice. */ + loop->dont_vectorize = true; + goto try_vectorize; + } + } + } + } + else + { + loop_vec_info loop_vinfo, orig_loop_vinfo; + gimple *loop_vectorized_call; + try_vectorize: + if (!((flag_tree_loop_vectorize + && optimize_loop_nest_for_speed_p (loop)) + || loop->force_vectorize)) + continue; + orig_loop_vinfo = NULL; + loop_vectorized_call = vect_loop_vectorized_call (loop); + vectorize_epilogue: vect_location = find_loop_location (loop); if (LOCATION_LOCUS (vect_location) != UNKNOWN_LOCATION && dump_enabled_p ()) @@ -529,11 +608,61 @@ vectorize_loops (void) LOCATION_FILE (vect_location), LOCATION_LINE (vect_location)); - loop_vinfo = vect_analyze_loop (loop); + loop_vinfo = vect_analyze_loop (loop, orig_loop_vinfo); loop->aux = loop_vinfo; if (!loop_vinfo || !LOOP_VINFO_VECTORIZABLE_P (loop_vinfo)) - continue; + { + /* Free existing information if loop is analyzed with some + assumptions. */ + if (loop_constraint_set_p (loop, LOOP_C_FINITE)) + vect_free_loop_info_assumptions (loop); + + /* If we applied if-conversion then try to vectorize the + BB of innermost loops. + ??? Ideally BB vectorization would learn to vectorize + control flow by applying if-conversion on-the-fly, the + following retains the if-converted loop body even when + only non-if-converted parts took part in BB vectorization. */ + if (flag_tree_slp_vectorize != 0 + && loop_vectorized_call + && ! loop->inner) + { + basic_block bb = loop->header; + bool has_mask_load_store = false; + for (gimple_stmt_iterator gsi = gsi_start_bb (bb); + !gsi_end_p (gsi); gsi_next (&gsi)) + { + gimple *stmt = gsi_stmt (gsi); + if (is_gimple_call (stmt) + && gimple_call_internal_p (stmt) + && (gimple_call_internal_fn (stmt) == IFN_MASK_LOAD + || gimple_call_internal_fn (stmt) == IFN_MASK_STORE)) + { + has_mask_load_store = true; + break; + } + gimple_set_uid (stmt, -1); + gimple_set_visited (stmt, false); + } + if (! has_mask_load_store && vect_slp_bb (bb)) + { + dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location, + "basic block vectorized\n"); + fold_loop_vectorized_call (loop_vectorized_call, + boolean_true_node); + loop_vectorized_call = NULL; + ret |= TODO_cleanup_cfg; + } + } + /* If outer loop vectorization fails for LOOP_VECTORIZED guarded + loop, don't vectorize its inner loop; we'll attempt to + vectorize LOOP_VECTORIZED guarded inner loop of the scalar + loop version. */ + if (loop_vectorized_call && loop->inner) + loop->inner->dont_vectorize = true; + continue; + } if (!dbg_cnt (vect_loop)) { @@ -541,17 +670,21 @@ vectorize_loops (void) debug counter. Set any_ifcvt_loops to visit them at finalization. */ any_ifcvt_loops = true; + /* Free existing information if loop is analyzed with some + assumptions. */ + if (loop_constraint_set_p (loop, LOOP_C_FINITE)) + vect_free_loop_info_assumptions (loop); + break; } - gimple *loop_vectorized_call = vect_loop_vectorized_call (loop); if (loop_vectorized_call) set_uid_loop_bbs (loop_vinfo, loop_vectorized_call); if (LOCATION_LOCUS (vect_location) != UNKNOWN_LOCATION && dump_enabled_p ()) dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location, "loop vectorized\n"); - vect_transform_loop (loop_vinfo); + new_loop = vect_transform_loop (loop_vinfo); num_vectorized_loops++; /* Now that the loop has been vectorized, allow it to be unrolled etc. */ @@ -571,8 +704,18 @@ vectorize_loops (void) if (loop_vectorized_call) { fold_loop_vectorized_call (loop_vectorized_call, boolean_true_node); + loop_vectorized_call = NULL; ret |= TODO_cleanup_cfg; } + + if (new_loop) + { + /* Epilogue of vectorized loop must be vectorized too. */ + vect_loops_num = number_of_loops (cfun); + loop = new_loop; + orig_loop_vinfo = loop_vinfo; /* To pass vect_analyze_loop. */ + goto vectorize_epilogue; + } } vect_location = UNKNOWN_LOCATION; @@ -794,38 +937,142 @@ make_pass_slp_vectorize (gcc::context *ctxt) This should involve global alignment analysis and in the future also array padding. */ +static unsigned get_vec_alignment_for_type (tree); +static hash_map<tree, unsigned> *type_align_map; + +/* Return alignment of array's vector type corresponding to scalar type. + 0 if no vector type exists. */ +static unsigned +get_vec_alignment_for_array_type (tree type) +{ + gcc_assert (TREE_CODE (type) == ARRAY_TYPE); + + tree vectype = get_vectype_for_scalar_type (strip_array_types (type)); + if (!vectype + || !TYPE_SIZE (type) + || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST + || tree_int_cst_lt (TYPE_SIZE (type), TYPE_SIZE (vectype))) + return 0; + + return TYPE_ALIGN (vectype); +} + +/* Return alignment of field having maximum alignment of vector type + corresponding to it's scalar type. For now, we only consider fields whose + offset is a multiple of it's vector alignment. + 0 if no suitable field is found. */ +static unsigned +get_vec_alignment_for_record_type (tree type) +{ + gcc_assert (TREE_CODE (type) == RECORD_TYPE); + + unsigned max_align = 0, alignment; + HOST_WIDE_INT offset; + tree offset_tree; + + if (TYPE_PACKED (type)) + return 0; + + unsigned *slot = type_align_map->get (type); + if (slot) + return *slot; + + for (tree field = first_field (type); + field != NULL_TREE; + field = DECL_CHAIN (field)) + { + /* Skip if not FIELD_DECL or if alignment is set by user. */ + if (TREE_CODE (field) != FIELD_DECL + || DECL_USER_ALIGN (field) + || DECL_ARTIFICIAL (field)) + continue; + + /* We don't need to process the type further if offset is variable, + since the offsets of remaining members will also be variable. */ + if (TREE_CODE (DECL_FIELD_OFFSET (field)) != INTEGER_CST + || TREE_CODE (DECL_FIELD_BIT_OFFSET (field)) != INTEGER_CST) + break; + + /* Similarly stop processing the type if offset_tree + does not fit in unsigned HOST_WIDE_INT. */ + offset_tree = bit_position (field); + if (!tree_fits_uhwi_p (offset_tree)) + break; + + offset = tree_to_uhwi (offset_tree); + alignment = get_vec_alignment_for_type (TREE_TYPE (field)); + + /* Get maximum alignment of vectorized field/array among those members + whose offset is multiple of the vector alignment. */ + if (alignment + && (offset % alignment == 0) + && (alignment > max_align)) + max_align = alignment; + } + + type_align_map->put (type, max_align); + return max_align; +} + +/* Return alignment of vector type corresponding to decl's scalar type + or 0 if it doesn't exist or the vector alignment is lesser than + decl's alignment. */ +static unsigned +get_vec_alignment_for_type (tree type) +{ + if (type == NULL_TREE) + return 0; + + gcc_assert (TYPE_P (type)); + + static unsigned alignment = 0; + switch (TREE_CODE (type)) + { + case ARRAY_TYPE: + alignment = get_vec_alignment_for_array_type (type); + break; + case RECORD_TYPE: + alignment = get_vec_alignment_for_record_type (type); + break; + default: + alignment = 0; + break; + } + + return (alignment > TYPE_ALIGN (type)) ? alignment : 0; +} + +/* Entry point to increase_alignment pass. */ static unsigned int increase_alignment (void) { varpool_node *vnode; vect_location = UNKNOWN_LOCATION; + type_align_map = new hash_map<tree, unsigned>; /* Increase the alignment of all global arrays for vectorization. */ FOR_EACH_DEFINED_VARIABLE (vnode) { - tree vectype, decl = vnode->decl; - tree t; + tree decl = vnode->decl; unsigned int alignment; - t = TREE_TYPE (decl); - if (TREE_CODE (t) != ARRAY_TYPE) - continue; - vectype = get_vectype_for_scalar_type (strip_array_types (t)); - if (!vectype) - continue; - alignment = TYPE_ALIGN (vectype); - if (DECL_ALIGN (decl) >= alignment) - continue; - - if (vect_can_force_dr_alignment_p (decl, alignment)) + if ((decl_in_symtab_p (decl) + && !symtab_node::get (decl)->can_increase_alignment_p ()) + || DECL_USER_ALIGN (decl) || DECL_ARTIFICIAL (decl)) + continue; + + alignment = get_vec_alignment_for_type (TREE_TYPE (decl)); + if (alignment && vect_can_force_dr_alignment_p (decl, alignment)) { - vnode->increase_alignment (TYPE_ALIGN (vectype)); + vnode->increase_alignment (alignment); dump_printf (MSG_NOTE, "Increasing alignment of decl: "); dump_generic_expr (MSG_NOTE, TDF_SLIM, decl); dump_printf (MSG_NOTE, "\n"); } } + + delete type_align_map; return 0; } |