summaryrefslogtreecommitdiff
path: root/gcc
diff options
context:
space:
mode:
Diffstat (limited to 'gcc')
-rw-r--r--gcc/ChangeLog28
-rw-r--r--gcc/Makefile.in1
-rw-r--r--gcc/common.opt4
-rw-r--r--gcc/doc/invoke.texi36
-rw-r--r--gcc/gimple-loop-versioning.cc1758
-rw-r--r--gcc/opts.c1
-rw-r--r--gcc/params.def13
-rw-r--r--gcc/passes.def1
-rw-r--r--gcc/testsuite/ChangeLog22
-rw-r--r--gcc/testsuite/gcc.dg/loop-versioning-1.c92
-rw-r--r--gcc/testsuite/gcc.dg/loop-versioning-10.c52
-rw-r--r--gcc/testsuite/gcc.dg/loop-versioning-11.c29
-rw-r--r--gcc/testsuite/gcc.dg/loop-versioning-12.c149
-rw-r--r--gcc/testsuite/gcc.dg/loop-versioning-13.c109
-rw-r--r--gcc/testsuite/gcc.dg/loop-versioning-14.c149
-rw-r--r--gcc/testsuite/gcc.dg/loop-versioning-2.c73
-rw-r--r--gcc/testsuite/gcc.dg/loop-versioning-3.c24
-rw-r--r--gcc/testsuite/gcc.dg/loop-versioning-4.c39
-rw-r--r--gcc/testsuite/gcc.dg/loop-versioning-5.c17
-rw-r--r--gcc/testsuite/gcc.dg/loop-versioning-6.c31
-rw-r--r--gcc/testsuite/gcc.dg/loop-versioning-7.c32
-rw-r--r--gcc/testsuite/gcc.dg/loop-versioning-8.c43
-rw-r--r--gcc/testsuite/gcc.dg/loop-versioning-9.c48
-rw-r--r--gcc/testsuite/gcc.dg/vect/slp-43.c2
-rw-r--r--gcc/testsuite/gcc.dg/vect/slp-45.c2
-rw-r--r--gcc/testsuite/gfortran.dg/loop_versioning_1.f9028
-rw-r--r--gcc/testsuite/gfortran.dg/loop_versioning_2.f9039
-rw-r--r--gcc/testsuite/gfortran.dg/loop_versioning_3.f9030
-rw-r--r--gcc/testsuite/gfortran.dg/loop_versioning_4.f9095
-rw-r--r--gcc/testsuite/gfortran.dg/loop_versioning_5.f9057
-rw-r--r--gcc/testsuite/gfortran.dg/loop_versioning_6.f9093
-rw-r--r--gcc/testsuite/gfortran.dg/loop_versioning_7.f9067
-rw-r--r--gcc/testsuite/gfortran.dg/loop_versioning_8.f9013
-rw-r--r--gcc/timevar.def1
-rw-r--r--gcc/tree-pass.h1
-rw-r--r--gcc/tree-ssa-propagate.c18
-rw-r--r--gcc/tree-ssa-propagate.h2
-rw-r--r--gcc/tree-vrp.c7
-rw-r--r--gcc/tree-vrp.h10
39 files changed, 3204 insertions, 12 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 83cacbfc1b1..83900c5b674 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,31 @@
+2018-12-17 Richard Sandiford <richard.sandiford@arm.com>
+
+ * doc/invoke.texi (-fversion-loops-for-strides): Document
+ (loop-versioning-group-size, loop-versioning-max-inner-insns)
+ (loop-versioning-max-outer-insns): Document new --params.
+ * Makefile.in (OBJS): Add gimple-loop-versioning.o.
+ * common.opt (fversion-loops-for-strides): New option.
+ * opts.c (default_options_table): Enable fversion-loops-for-strides
+ at -O3.
+ * params.def (PARAM_LOOP_VERSIONING_GROUP_SIZE)
+ (PARAM_LOOP_VERSIONING_MAX_INNER_INSNS)
+ (PARAM_LOOP_VERSIONING_MAX_OUTER_INSNS): New parameters.
+ * passes.def: Add pass_loop_versioning.
+ * timevar.def (TV_LOOP_VERSIONING): New time variable.
+ * tree-ssa-propagate.h
+ (substitute_and_fold_engine::substitute_and_fold): Add an optional
+ block parameter.
+ * tree-ssa-propagate.c
+ (substitute_and_fold_engine::substitute_and_fold): Likewise.
+ When passed, only walk blocks dominated by that block.
+ * tree-vrp.h (range_includes_p): Declare.
+ (range_includes_zero_p): Turn into an inline wrapper around
+ range_includes_p.
+ * tree-vrp.c (range_includes_p): New function, generalizing...
+ (range_includes_zero_p): ...this.
+ * tree-pass.h (make_pass_loop_versioning): Declare.
+ * gimple-loop-versioning.cc: New file.
+
2018-12-15 Jan Hubicka <hubicka@ucw.cz>
* ipa-fnsummary.c (remap_edge_change_prob): Do not ICE when changes
diff --git a/gcc/Makefile.in b/gcc/Makefile.in
index 881487b0839..7960cace16a 100644
--- a/gcc/Makefile.in
+++ b/gcc/Makefile.in
@@ -1320,6 +1320,7 @@ OBJS = \
gimple-laddress.o \
gimple-loop-interchange.o \
gimple-loop-jam.o \
+ gimple-loop-versioning.o \
gimple-low.o \
gimple-pretty-print.o \
gimple-ssa-backprop.o \
diff --git a/gcc/common.opt b/gcc/common.opt
index 08bffdf2c2d..45d7f6189e5 100644
--- a/gcc/common.opt
+++ b/gcc/common.opt
@@ -2775,6 +2775,10 @@ fsplit-loops
Common Report Var(flag_split_loops) Optimization
Perform loop splitting.
+fversion-loops-for-strides
+Common Report Var(flag_version_loops_for_strides) Optimization
+Version loops based on whether indices have a stride of one.
+
funwind-tables
Common Report Var(flag_unwind_tables) Optimization
Just generate unwind tables for exception handling.
diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
index 9a14170d084..ac2ee59d92c 100644
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@@ -8220,7 +8220,8 @@ by @option{-O2} and also turns on the following optimization flags:
-ftree-partial-pre @gol
-ftree-slp-vectorize @gol
-funswitch-loops @gol
--fvect-cost-model}
+-fvect-cost-model @gol
+-fversion-loops-for-strides}
@item -O0
@opindex O0
@@ -10772,6 +10773,30 @@ of the loop on both branches (modified according to result of the condition).
Enabled by @option{-fprofile-use} and @option{-fauto-profile}.
+@item -fversion-loops-for-strides
+@opindex fversion-loops-for-strides
+If a loop iterates over an array with a variable stride, create another
+version of the loop that assumes the stride is always one. For example:
+
+@smallexample
+for (int i = 0; i < n; ++i)
+ x[i * stride] = @dots{};
+@end smallexample
+
+becomes:
+
+@smallexample
+if (stride == 1)
+ for (int i = 0; i < n; ++i)
+ x[i] = @dots{};
+else
+ for (int i = 0; i < n; ++i)
+ x[i * stride] = @dots{};
+@end smallexample
+
+This is particularly useful for assumed-shape arrays in Fortran where
+(for example) it allows better vectorization assuming contiguous accesses.
+
@item -ffunction-sections
@itemx -fdata-sections
@opindex ffunction-sections
@@ -11981,6 +12006,15 @@ Hardware autoprefetcher scheduler model control flag.
Number of lookahead cycles the model looks into; at '
' only enable instruction sorting heuristic.
+@item loop-versioning-max-inner-insns
+The maximum number of instructions that an inner loop can have
+before the loop versioning pass considers it too big to copy.
+
+@item loop-versioning-max-outer-insns
+The maximum number of instructions that an outer loop can have
+before the loop versioning pass considers it too big to copy,
+discounting any instructions in inner loops that directly benefit
+from versioning.
@end table
@end table
diff --git a/gcc/gimple-loop-versioning.cc b/gcc/gimple-loop-versioning.cc
new file mode 100644
index 00000000000..887b5e77d6a
--- /dev/null
+++ b/gcc/gimple-loop-versioning.cc
@@ -0,0 +1,1758 @@
+/* Loop versioning pass.
+ Copyright (C) 2018 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it
+under the terms of the GNU General Public License as published by the
+Free Software Foundation; either version 3, or (at your option) any
+later version.
+
+GCC is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3. If not see
+<http://www.gnu.org/licenses/>. */
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "backend.h"
+#include "tree.h"
+#include "gimple.h"
+#include "gimple-iterator.h"
+#include "tree-pass.h"
+#include "gimplify-me.h"
+#include "cfgloop.h"
+#include "tree-ssa-loop.h"
+#include "ssa.h"
+#include "tree-scalar-evolution.h"
+#include "tree-chrec.h"
+#include "tree-ssa-loop-ivopts.h"
+#include "fold-const.h"
+#include "tree-ssa-propagate.h"
+#include "tree-inline.h"
+#include "domwalk.h"
+#include "alloc-pool.h"
+#include "vr-values.h"
+#include "gimple-ssa-evrp-analyze.h"
+#include "tree-vectorizer.h"
+#include "omp-general.h"
+#include "predict.h"
+#include "tree-into-ssa.h"
+#include "params.h"
+
+namespace {
+
+/* This pass looks for loops that could be simplified if certain loop
+ invariant conditions were true. It is effectively a form of loop
+ splitting in which the pass produces the split conditions itself,
+ instead of using ones that are already present in the IL.
+
+ Versioning for when strides are 1
+ ---------------------------------
+
+ At the moment the only thing the pass looks for are memory references
+ like:
+
+ for (auto i : ...)
+ ...x[i * stride]...
+
+ It considers changing such loops to:
+
+ if (stride == 1)
+ for (auto i : ...) [A]
+ ...x[i]...
+ else
+ for (auto i : ...) [B]
+ ...x[i * stride]...
+
+ This can have several benefits:
+
+ (1) [A] is often easier or cheaper to vectorize than [B].
+
+ (2) The scalar code in [A] is simpler than the scalar code in [B]
+ (if the loops cannot be vectorized or need an epilogue loop).
+
+ (3) We might recognize [A] as a pattern, such as a memcpy or memset.
+
+ (4) [A] has simpler address evolutions, which can help other passes
+ like loop interchange.
+
+ The optimization is particularly useful for assumed-shape arrays in
+ Fortran, where the stride of the innermost dimension depends on the
+ array descriptor but is often equal to 1 in practice. For example:
+
+ subroutine f1(x)
+ real :: x(:)
+ x(:) = 100
+ end subroutine f1
+
+ generates the equivalent of:
+
+ raw_stride = *x.dim[0].stride;
+ stride = raw_stride != 0 ? raw_stride : 1;
+ x_base = *x.data;
+ ...
+ tmp1 = stride * S;
+ tmp2 = tmp1 - stride;
+ *x_base[tmp2] = 1.0e+2;
+
+ but in the common case that stride == 1, the last three statements
+ simplify to:
+
+ tmp3 = S + -1;
+ *x_base[tmp3] = 1.0e+2;
+
+ The optimization is in principle very simple. The difficult parts are:
+
+ (a) deciding which parts of a general address calculation correspond
+ to the inner dimension of an array, since this usually isn't explicit
+ in the IL, and for C often isn't even explicit in the source code
+
+ (b) estimating when the transformation is worthwhile
+
+ Structure
+ ---------
+
+ The pass has four phases:
+
+ (1) Walk through the statements looking for and recording potential
+ versioning opportunities. Stop if there are none.
+
+ (2) Use context-sensitive range information to see whether any versioning
+ conditions are impossible in practice. Remove them if so, and stop
+ if no opportunities remain.
+
+ (We do this only after (1) to keep compile time down when no
+ versioning opportunities exist.)
+
+ (3) Apply the cost model. Decide which versioning opportunities are
+ worthwhile and at which nesting level they should be applied.
+
+ (4) Attempt to version all the loops selected by (3), so that:
+
+ for (...)
+ ...
+
+ becomes:
+
+ if (!cond)
+ for (...) // Original loop
+ ...
+ else
+ for (...) // New loop
+ ...
+
+ Use the version condition COND to simplify the new loop. */
+
+/* Enumerates the likelihood that a particular value indexes the inner
+ dimension of an array. */
+enum inner_likelihood {
+ INNER_UNLIKELY,
+ INNER_DONT_KNOW,
+ INNER_LIKELY
+};
+
+/* Information about one term of an address_info. */
+struct address_term_info
+{
+ /* The value of the term is EXPR * MULTIPLIER. */
+ tree expr;
+ unsigned HOST_WIDE_INT multiplier;
+
+ /* The stride applied by EXPR in each iteration of some unrecorded loop,
+ or null if no stride has been identified. */
+ tree stride;
+
+ /* Enumerates the likelihood that EXPR indexes the inner dimension
+ of an array. */
+ enum inner_likelihood inner_likelihood;
+
+ /* True if STRIDE == 1 is a versioning opportunity when considered
+ in isolation. */
+ bool versioning_opportunity_p;
+};
+
+/* Information about an address calculation, and the range of constant
+ offsets applied to it. */
+struct address_info
+{
+ static const unsigned int MAX_TERMS = 8;
+
+ /* One statement that calculates the address. If multiple statements
+ share the same address, we only record the first. */
+ gimple *stmt;
+
+ /* The loop containing STMT (cached for convenience). If multiple
+ statements share the same address, they all belong to this loop. */
+ struct loop *loop;
+
+ /* A decomposition of the calculation into a sum of terms plus an
+ optional base. When BASE is provided, it is never an SSA name.
+ Once initialization is complete, all members of TERMs are SSA names. */
+ tree base;
+ auto_vec<address_term_info, MAX_TERMS> terms;
+
+ /* All bytes accessed from the address fall in the offset range
+ [MIN_OFFSET, MAX_OFFSET). */
+ HOST_WIDE_INT min_offset, max_offset;
+};
+
+/* Stores addresses based on their base and terms (ignoring the offsets). */
+struct address_info_hasher : nofree_ptr_hash <address_info>
+{
+ static hashval_t hash (const address_info *);
+ static bool equal (const address_info *, const address_info *);
+};
+
+/* Information about the versioning we'd like to apply to a loop. */
+struct loop_info
+{
+ bool worth_versioning_p () const;
+
+ /* True if we've decided not to version this loop. The remaining
+ fields are meaningless if so. */
+ bool rejected_p;
+
+ /* True if at least one subloop of this loop benefits from versioning. */
+ bool subloops_benefit_p;
+
+ /* An estimate of the total number of instructions in the loop,
+ excluding those in subloops that benefit from versioning. */
+ unsigned int num_insns;
+
+ /* The outermost loop that can handle all the version checks
+ described below. */
+ struct loop *outermost;
+
+ /* The first entry in the list of blocks that belong to this loop
+ (and not to subloops). m_next_block_in_loop provides the chain
+ pointers for the list. */
+ basic_block block_list;
+
+ /* We'd like to version the loop for the case in which these SSA names
+ (keyed off their SSA_NAME_VERSION) are all equal to 1 at runtime. */
+ bitmap_head unity_names;
+
+ /* If versioning succeeds, this points the version of the loop that
+ assumes the version conditions holds. */
+ struct loop *optimized_loop;
+};
+
+/* The main pass structure. */
+class loop_versioning
+{
+public:
+ loop_versioning (function *);
+ ~loop_versioning ();
+ unsigned int run ();
+
+private:
+ /* Used to walk the dominator tree to find loop versioning conditions
+ that are always false. */
+ class lv_dom_walker : public dom_walker
+ {
+ public:
+ lv_dom_walker (loop_versioning &);
+
+ edge before_dom_children (basic_block) FINAL OVERRIDE;
+ void after_dom_children (basic_block) FINAL OVERRIDE;
+
+ private:
+ /* The parent pass. */
+ loop_versioning &m_lv;
+
+ /* Used to build context-dependent range information. */
+ evrp_range_analyzer m_range_analyzer;
+ };
+
+ /* Used to simplify statements based on conditions that are established
+ by the version checks. */
+ class name_prop : public substitute_and_fold_engine
+ {
+ public:
+ name_prop (loop_info &li) : m_li (li) {}
+ tree get_value (tree) FINAL OVERRIDE;
+
+ private:
+ /* Information about the versioning we've performed on the loop. */
+ loop_info &m_li;
+ };
+
+ loop_info &get_loop_info (struct loop *loop) { return m_loops[loop->num]; }
+
+ unsigned int max_insns_for_loop (struct loop *);
+ bool expensive_stmt_p (gimple *);
+
+ void version_for_unity (gimple *, tree);
+ bool acceptable_multiplier_p (tree, unsigned HOST_WIDE_INT,
+ unsigned HOST_WIDE_INT * = 0);
+ bool acceptable_type_p (tree, unsigned HOST_WIDE_INT *);
+ bool multiply_term_by (address_term_info &, tree);
+ inner_likelihood get_inner_likelihood (tree, unsigned HOST_WIDE_INT);
+ void analyze_stride (address_info &, address_term_info &,
+ tree, struct loop *);
+ bool find_per_loop_multiplication (address_info &, address_term_info &);
+ void analyze_term_using_scevs (address_info &, address_term_info &);
+ void analyze_address_fragment (address_info &);
+ void record_address_fragment (gimple *, unsigned HOST_WIDE_INT,
+ tree, unsigned HOST_WIDE_INT, HOST_WIDE_INT);
+ void analyze_expr (gimple *, tree);
+ bool analyze_block (basic_block);
+ bool analyze_blocks ();
+
+ void prune_loop_conditions (struct loop *, vr_values *);
+ bool prune_conditions ();
+
+ void merge_loop_info (struct loop *, struct loop *);
+ void add_loop_to_queue (struct loop *);
+ bool decide_whether_loop_is_versionable (struct loop *);
+ bool make_versioning_decisions ();
+
+ bool version_loop (struct loop *);
+ void implement_versioning_decisions ();
+
+ /* The function we're optimizing. */
+ function *m_fn;
+
+ /* The obstack to use for all pass-specific bitmaps. */
+ bitmap_obstack m_bitmap_obstack;
+
+ /* An obstack to use for general allocation. */
+ obstack m_obstack;
+
+ /* The number of loops in the function. */
+ unsigned int m_nloops;
+
+ /* The total number of loop version conditions we've found. */
+ unsigned int m_num_conditions;
+
+ /* Assume that an address fragment of the form i * stride * scale
+ (for variable stride and constant scale) will not benefit from
+ versioning for stride == 1 when scale is greater than this value. */
+ unsigned HOST_WIDE_INT m_maximum_scale;
+
+ /* Information about each loop. */
+ auto_vec<loop_info> m_loops;
+
+ /* Used to form a linked list of blocks that belong to a loop,
+ started by loop_info::block_list. */
+ auto_vec<basic_block> m_next_block_in_loop;
+
+ /* The list of loops that we've decided to version. */
+ auto_vec<struct loop *> m_loops_to_version;
+
+ /* A table of addresses in the current loop, keyed off their values
+ but not their offsets. */
+ hash_table <address_info_hasher> m_address_table;
+
+ /* A list of all addresses in M_ADDRESS_TABLE, in a predictable order. */
+ auto_vec <address_info *, 32> m_address_list;
+};
+
+/* If EXPR is an SSA name and not a default definition, return the
+ defining statement, otherwise return null. */
+
+static gimple *
+maybe_get_stmt (tree expr)
+{
+ if (TREE_CODE (expr) == SSA_NAME && !SSA_NAME_IS_DEFAULT_DEF (expr))
+ return SSA_NAME_DEF_STMT (expr);
+ return NULL;
+}
+
+/* Like maybe_get_stmt, but also return null if the defining
+ statement isn't an assignment. */
+
+static gassign *
+maybe_get_assign (tree expr)
+{
+ return safe_dyn_cast <gassign *> (maybe_get_stmt (expr));
+}
+
+/* Return true if this pass should look through a cast of expression FROM
+ to type TYPE when analyzing pieces of an address. */
+
+static bool
+look_through_cast_p (tree type, tree from)
+{
+ return (INTEGRAL_TYPE_P (TREE_TYPE (from)) == INTEGRAL_TYPE_P (type)
+ && POINTER_TYPE_P (TREE_TYPE (from)) == POINTER_TYPE_P (type));
+}
+
+/* Strip all conversions of integers or pointers from EXPR, regardless
+ of whether the conversions are nops. This is useful in the context
+ of this pass because we're not trying to fold or simulate the
+ expression; we just want to see how it's structured. */
+
+static tree
+strip_casts (tree expr)
+{
+ const unsigned int MAX_NITERS = 4;
+
+ tree type = TREE_TYPE (expr);
+ while (CONVERT_EXPR_P (expr)
+ && look_through_cast_p (type, TREE_OPERAND (expr, 0)))
+ expr = TREE_OPERAND (expr, 0);
+
+ for (unsigned int niters = 0; niters < MAX_NITERS; ++niters)
+ {
+ gassign *assign = maybe_get_assign (expr);
+ if (assign
+ && CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (assign))
+ && look_through_cast_p (type, gimple_assign_rhs1 (assign)))
+ expr = gimple_assign_rhs1 (assign);
+ else
+ break;
+ }
+ return expr;
+}
+
+/* Compare two address_term_infos in the same address_info. */
+
+static int
+compare_address_terms (const void *a_uncast, const void *b_uncast)
+{
+ const address_term_info *a = (const address_term_info *) a_uncast;
+ const address_term_info *b = (const address_term_info *) b_uncast;
+
+ if (a->expr != b->expr)
+ return SSA_NAME_VERSION (a->expr) < SSA_NAME_VERSION (b->expr) ? -1 : 1;
+
+ if (a->multiplier != b->multiplier)
+ return a->multiplier < b->multiplier ? -1 : 1;
+
+ return 0;
+}
+
+/* Dump ADDRESS using flags FLAGS. */
+
+static void
+dump_address_info (dump_flags_t flags, address_info &address)
+{
+ if (address.base)
+ dump_printf (flags, "%T + ", address.base);
+ for (unsigned int i = 0; i < address.terms.length (); ++i)
+ {
+ if (i != 0)
+ dump_printf (flags, " + ");
+ dump_printf (flags, "%T", address.terms[i].expr);
+ if (address.terms[i].multiplier != 1)
+ dump_printf (flags, " * %wd", address.terms[i].multiplier);
+ }
+ dump_printf (flags, " + [%wd, %wd]",
+ address.min_offset, address.max_offset - 1);
+}
+
+/* Hash an address_info based on its base and terms. */
+
+hashval_t
+address_info_hasher::hash (const address_info *info)
+{
+ inchash::hash hash;
+ hash.add_int (info->base ? TREE_CODE (info->base) : 0);
+ hash.add_int (info->terms.length ());
+ for (unsigned int i = 0; i < info->terms.length (); ++i)
+ {
+ hash.add_int (SSA_NAME_VERSION (info->terms[i].expr));
+ hash.add_hwi (info->terms[i].multiplier);
+ }
+ return hash.end ();
+}
+
+/* Return true if two address_infos have equal bases and terms. Other
+ properties might be different (such as the statement or constant
+ offset range). */
+
+bool
+address_info_hasher::equal (const address_info *a, const address_info *b)
+{
+ if (a->base != b->base
+ && (!a->base || !b->base || !operand_equal_p (a->base, b->base, 0)))
+ return false;
+
+ if (a->terms.length () != b->terms.length ())
+ return false;
+
+ for (unsigned int i = 0; i < a->terms.length (); ++i)
+ if (a->terms[i].expr != b->terms[i].expr
+ || a->terms[i].multiplier != b->terms[i].multiplier)
+ return false;
+
+ return true;
+}
+
+/* Return true if we want to version the loop, i.e. if we have a
+ specific reason for doing so and no specific reason not to. */
+
+bool
+loop_info::worth_versioning_p () const
+{
+ return (!rejected_p
+ && (!bitmap_empty_p (&unity_names) || subloops_benefit_p));
+}
+
+loop_versioning::lv_dom_walker::lv_dom_walker (loop_versioning &lv)
+ : dom_walker (CDI_DOMINATORS), m_lv (lv), m_range_analyzer (false)
+{
+}
+
+/* Process BB before processing the blocks it dominates. */
+
+edge
+loop_versioning::lv_dom_walker::before_dom_children (basic_block bb)
+{
+ m_range_analyzer.enter (bb);
+
+ if (bb == bb->loop_father->header)
+ m_lv.prune_loop_conditions (bb->loop_father,
+ m_range_analyzer.get_vr_values ());
+
+ for (gimple_stmt_iterator si = gsi_start_bb (bb); !gsi_end_p (si);
+ gsi_next (&si))
+ m_range_analyzer.record_ranges_from_stmt (gsi_stmt (si), false);
+
+ return NULL;
+}
+
+/* Process BB after processing the blocks it dominates. */
+
+void
+loop_versioning::lv_dom_walker::after_dom_children (basic_block bb)
+{
+ m_range_analyzer.leave (bb);
+}
+
+/* Decide whether to replace VAL with a new value in a versioned loop.
+ Return the new value if so, otherwise return null. */
+
+tree
+loop_versioning::name_prop::get_value (tree val)
+{
+ if (TREE_CODE (val) == SSA_NAME
+ && bitmap_bit_p (&m_li.unity_names, SSA_NAME_VERSION (val)))
+ return build_one_cst (TREE_TYPE (val));
+ return NULL_TREE;
+}
+
+/* Initialize the structure to optimize FN. */
+
+loop_versioning::loop_versioning (function *fn)
+ : m_fn (fn),
+ m_nloops (number_of_loops (fn)),
+ m_num_conditions (0),
+ m_address_table (31)
+{
+ bitmap_obstack_initialize (&m_bitmap_obstack);
+ gcc_obstack_init (&m_obstack);
+
+ /* Initialize the loop information. */
+ m_loops.safe_grow_cleared (m_nloops);
+ for (unsigned int i = 0; i < m_nloops; ++i)
+ {
+ m_loops[i].outermost = get_loop (m_fn, 0);
+ bitmap_initialize (&m_loops[i].unity_names, &m_bitmap_obstack);
+ }
+
+ /* Initialize the list of blocks that belong to each loop. */
+ unsigned int nbbs = last_basic_block_for_fn (fn);
+ m_next_block_in_loop.safe_grow (nbbs);
+ basic_block bb;
+ FOR_EACH_BB_FN (bb, fn)
+ {
+ loop_info &li = get_loop_info (bb->loop_father);
+ m_next_block_in_loop[bb->index] = li.block_list;
+ li.block_list = bb;
+ }
+
+ /* MAX_FIXED_MODE_SIZE should be a reasonable maximum scale for
+ unvectorizable code, since it is the largest size that can be
+ handled efficiently by scalar code. omp_max_vf calculates the
+ maximum number of bytes in a vector, when such a value is relevant
+ to loop optimization. */
+ m_maximum_scale = estimated_poly_value (omp_max_vf ());
+ m_maximum_scale = MAX (m_maximum_scale, MAX_FIXED_MODE_SIZE);
+}
+
+loop_versioning::~loop_versioning ()
+{
+ bitmap_obstack_release (&m_bitmap_obstack);
+ obstack_free (&m_obstack, NULL);
+}
+
+/* Return the maximum number of instructions allowed in LOOP before
+ it becomes too big for versioning.
+
+ There are separate limits for inner and outer loops. The limit for
+ inner loops applies only to loops that benefit directly from versioning.
+ The limit for outer loops applies to all code in the outer loop and
+ its subloops that *doesn't* benefit directly from versioning; such code
+ would be "taken along for the ride". The idea is that if the cost of
+ the latter is small, it is better to version outer loops rather than
+ inner loops, both to reduce the number of repeated checks and to enable
+ more of the loop nest to be optimized as a natural nest (e.g. by loop
+ interchange or outer-loop vectorization). */
+
+unsigned int
+loop_versioning::max_insns_for_loop (struct loop *loop)
+{
+ return (loop->inner
+ ? PARAM_VALUE (PARAM_LOOP_VERSIONING_MAX_OUTER_INSNS)
+ : PARAM_VALUE (PARAM_LOOP_VERSIONING_MAX_INNER_INSNS));
+}
+
+/* Return true if for cost reasons we should avoid versioning any loop
+ that contains STMT.
+
+ Note that we don't need to check whether versioning is invalid for
+ correctness reasons, since the versioning process does that for us.
+ The conditions involved are too rare to be worth duplicating here. */
+
+bool
+loop_versioning::expensive_stmt_p (gimple *stmt)
+{
+ if (gcall *call = dyn_cast <gcall *> (stmt))
+ /* Assume for now that the time spent in an "expensive" call would
+ overwhelm any saving from versioning. */
+ return !gimple_inexpensive_call_p (call);
+ return false;
+}
+
+/* Record that we want to version the loop that contains STMT for the
+ case in which SSA name NAME is equal to 1. We already know that NAME
+ is invariant in the loop. */
+
+void
+loop_versioning::version_for_unity (gimple *stmt, tree name)
+{
+ struct loop *loop = loop_containing_stmt (stmt);
+ loop_info &li = get_loop_info (loop);
+
+ if (bitmap_set_bit (&li.unity_names, SSA_NAME_VERSION (name)))
+ {
+ /* This is the first time we've wanted to version LOOP for NAME.
+ Keep track of the outermost loop that can handle all versioning
+ checks in LI. */
+ struct loop *outermost
+ = outermost_invariant_loop_for_expr (loop, name);
+ if (loop_depth (li.outermost) < loop_depth (outermost))
+ li.outermost = outermost;
+
+ if (dump_enabled_p ())
+ {
+ dump_printf_loc (MSG_NOTE, stmt, "want to version containing loop"
+ " for when %T == 1", name);
+ if (outermost == loop)
+ dump_printf (MSG_NOTE, "; cannot hoist check further");
+ else
+ {
+ dump_printf (MSG_NOTE, "; could implement the check at loop"
+ " depth %d", loop_depth (outermost));
+ if (loop_depth (li.outermost) > loop_depth (outermost))
+ dump_printf (MSG_NOTE, ", but other checks only allow"
+ " a depth of %d", loop_depth (li.outermost));
+ }
+ dump_printf (MSG_NOTE, "\n");
+ }
+
+ m_num_conditions += 1;
+ }
+ else
+ {
+ /* This is a duplicate request. */
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_NOTE, stmt, "already asked to version containing"
+ " loop for when %T == 1\n", name);
+ }
+}
+
+/* Return true if OP1_TREE is constant and if in principle it is worth
+ versioning an address fragment of the form:
+
+ i * OP1_TREE * OP2 * stride
+
+ for the case in which stride == 1. This in practice means testing
+ whether:
+
+ OP1_TREE * OP2 <= M_MAXIMUM_SCALE.
+
+ If RESULT is nonnull, store OP1_TREE * OP2 there when returning true. */
+
+bool
+loop_versioning::acceptable_multiplier_p (tree op1_tree,
+ unsigned HOST_WIDE_INT op2,
+ unsigned HOST_WIDE_INT *result)
+{
+ if (tree_fits_uhwi_p (op1_tree))
+ {
+ unsigned HOST_WIDE_INT op1 = tree_to_uhwi (op1_tree);
+ /* The first part checks for overflow. */
+ if (op1 * op2 >= op2 && op1 * op2 <= m_maximum_scale)
+ {
+ if (result)
+ *result = op1 * op2;
+ return true;
+ }
+ }
+ return false;
+}
+
+/* Return true if it is worth using loop versioning on a memory access
+ of type TYPE. Store the size of the access in *SIZE if so. */
+
+bool
+loop_versioning::acceptable_type_p (tree type, unsigned HOST_WIDE_INT *size)
+{
+ return (TYPE_SIZE_UNIT (type)
+ && acceptable_multiplier_p (TYPE_SIZE_UNIT (type), 1, size));
+}
+
+/* See whether OP is constant and whether we can multiply TERM by that
+ constant without exceeding M_MAXIMUM_SCALE. Return true and update
+ TERM if so. */
+
+bool
+loop_versioning::multiply_term_by (address_term_info &term, tree op)
+{
+ return acceptable_multiplier_p (op, term.multiplier, &term.multiplier);
+}
+
+/* Decide whether an address fragment of the form STRIDE * MULTIPLIER
+ is likely to be indexing an innermost dimension, returning the result
+ as an INNER_* probability. */
+
+inner_likelihood
+loop_versioning::get_inner_likelihood (tree stride,
+ unsigned HOST_WIDE_INT multiplier)
+{
+ const unsigned int MAX_NITERS = 8;
+
+ /* Iterate over possible values of STRIDE. Return INNER_LIKELY if at
+ least one of those values is likely to be for the innermost dimension.
+ Record in UNLIKELY_P if at least one of those values is unlikely to be
+ for the innermost dimension.
+
+ E.g. for:
+
+ stride = cond ? a * b : 1
+
+ we should treat STRIDE as being a likely inner dimension, since
+ we know that it is 1 under at least some circumstances. (See the
+ Fortran example below.) However:
+
+ stride = a * b
+
+ on its own is unlikely to be for the innermost dimension, since
+ that would require both a and b to be 1 at runtime. */
+ bool unlikely_p = false;
+ tree worklist[MAX_NITERS];
+ unsigned int length = 0;
+ worklist[length++] = stride;
+ for (unsigned int i = 0; i < length; ++i)
+ {
+ tree expr = worklist[i];
+
+ if (CONSTANT_CLASS_P (expr))
+ {
+ /* See if EXPR * MULTIPLIER would be consistent with an individual
+ access or a small grouped access. */
+ if (acceptable_multiplier_p (expr, multiplier))
+ return INNER_LIKELY;
+ else
+ unlikely_p = true;
+ }
+ else if (gimple *stmt = maybe_get_stmt (expr))
+ {
+ /* If EXPR is set by a PHI node, queue its arguments in case
+ we find one that is consistent with an inner dimension.
+
+ An important instance of this is the Fortran handling of array
+ descriptors, which calculates the stride of the inner dimension
+ using a PHI equivalent of:
+
+ raw_stride = a.dim[0].stride;
+ stride = raw_stride != 0 ? raw_stride : 1;
+
+ (Strides for outer dimensions do not treat 0 specially.) */
+ if (gphi *phi = dyn_cast <gphi *> (stmt))
+ {
+ unsigned int nargs = gimple_phi_num_args (phi);
+ for (unsigned int j = 0; j < nargs && length < MAX_NITERS; ++j)
+ worklist[length++] = strip_casts (gimple_phi_arg_def (phi, j));
+ }
+ /* If the value is set by an assignment, expect it to be read
+ from memory (such as an array descriptor) rather than be
+ calculated. */
+ else if (gassign *assign = dyn_cast <gassign *> (stmt))
+ {
+ if (!gimple_assign_load_p (assign))
+ unlikely_p = true;
+ }
+ /* Things like calls don't really tell us anything. */
+ }
+ }
+
+ /* We didn't find any possible values of STRIDE that were likely to be
+ for the innermost dimension. If we found one that was actively
+ unlikely to be for the innermost dimension, assume that that applies
+ to STRIDE too. */
+ return unlikely_p ? INNER_UNLIKELY : INNER_DONT_KNOW;
+}
+
+/* The caller has identified that STRIDE is the stride of interest
+ in TERM, and that the stride is applied in OP_LOOP. Record this
+ information in TERM, deciding whether STRIDE is likely to be for
+ the innermost dimension of an array and whether it represents a
+ versioning opportunity. ADDRESS is the address that contains TERM. */
+
+void
+loop_versioning::analyze_stride (address_info &address,
+ address_term_info &term,
+ tree stride, struct loop *op_loop)
+{
+ term.stride = stride;
+
+ term.inner_likelihood = get_inner_likelihood (stride, term.multiplier);
+ if (dump_enabled_p ())
+ {
+ if (term.inner_likelihood == INNER_LIKELY)
+ dump_printf_loc (MSG_NOTE, address.stmt, "%T is likely to be the"
+ " innermost dimension\n", stride);
+ else if (term.inner_likelihood == INNER_UNLIKELY)
+ dump_printf_loc (MSG_NOTE, address.stmt, "%T is probably not the"
+ " innermost dimension\n", stride);
+ else
+ dump_printf_loc (MSG_NOTE, address.stmt, "cannot tell whether %T"
+ " is the innermost dimension\n", stride);
+ }
+
+ /* To be a versioning opportunity we require:
+
+ - The multiplier applied by TERM is equal to the access size,
+ so that when STRIDE is 1, the accesses in successive loop
+ iterations are consecutive.
+
+ This is deliberately conservative. We could relax it to handle
+ other cases (such as those with gaps between iterations) if we
+ find any real testcases for which it's useful.
+
+ - the stride is applied in the same loop as STMT rather than
+ in an outer loop. Although versioning for strides applied in
+ outer loops could help in some cases -- such as enabling
+ more loop interchange -- the savings are much lower than for
+ inner loops.
+
+ - the stride is an SSA name that is invariant in STMT's loop,
+ since otherwise versioning isn't possible. */
+ unsigned HOST_WIDE_INT access_size = address.max_offset - address.min_offset;
+ if (term.multiplier == access_size
+ && address.loop == op_loop
+ && TREE_CODE (stride) == SSA_NAME
+ && expr_invariant_in_loop_p (address.loop, stride))
+ {
+ term.versioning_opportunity_p = true;
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_NOTE, address.stmt, "%T == 1 is a versioning"
+ " opportunity\n", stride);
+ }
+}
+
+/* See whether address term TERM (which belongs to ADDRESS) is the result
+ of multiplying a varying SSA name by a loop-invariant SSA name.
+ Return true and update TERM if so.
+
+ This handles both cases that SCEV might handle, such as:
+
+ for (int i = 0; i < n; ++i)
+ res += a[i * stride];
+
+ and ones in which the term varies arbitrarily between iterations, such as:
+
+ for (int i = 0; i < n; ++i)
+ res += a[index[i] * stride]; */
+
+bool
+loop_versioning::find_per_loop_multiplication (address_info &address,
+ address_term_info &term)
+{
+ gimple *mult = maybe_get_assign (term.expr);
+ if (!mult || gimple_assign_rhs_code (mult) != MULT_EXPR)
+ return false;
+
+ struct loop *mult_loop = loop_containing_stmt (mult);
+ if (!loop_outer (mult_loop))
+ return false;
+
+ tree op1 = strip_casts (gimple_assign_rhs1 (mult));
+ tree op2 = strip_casts (gimple_assign_rhs2 (mult));
+ if (TREE_CODE (op1) != SSA_NAME || TREE_CODE (op2) != SSA_NAME)
+ return false;
+
+ bool invariant1_p = expr_invariant_in_loop_p (mult_loop, op1);
+ bool invariant2_p = expr_invariant_in_loop_p (mult_loop, op2);
+ if (invariant1_p == invariant2_p)
+ return false;
+
+ /* Make sure that the loop invariant is OP2 rather than OP1. */
+ if (invariant1_p)
+ std::swap (op1, op2);
+
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_NOTE, address.stmt, "address term %T = varying %T"
+ " * loop-invariant %T\n", term.expr, op1, op2);
+ analyze_stride (address, term, op2, mult_loop);
+ return true;
+}
+
+/* Try to use scalar evolutions to find an address stride for TERM,
+ which belongs to ADDRESS.
+
+ Here we are interested in any evolution information we can find,
+ not just evolutions wrt ADDRESS->LOOP. For example, if we find that
+ an outer loop obviously iterates over the inner dimension of an array,
+ that information can help us eliminate worthless versioning opportunities
+ in inner loops. */
+
+void
+loop_versioning::analyze_term_using_scevs (address_info &address,
+ address_term_info &term)
+{
+ gimple *setter = maybe_get_stmt (term.expr);
+ if (!setter)
+ return;
+
+ struct loop *wrt_loop = loop_containing_stmt (setter);
+ if (!loop_outer (wrt_loop))
+ return;
+
+ tree chrec = strip_casts (analyze_scalar_evolution (wrt_loop, term.expr));
+ if (TREE_CODE (chrec) == POLYNOMIAL_CHREC)
+ {
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_NOTE, address.stmt,
+ "address term %T = %T\n", term.expr, chrec);
+
+ /* Peel casts and accumulate constant multiplications, up to the
+ limit allowed by M_MAXIMUM_SCALE. */
+ tree stride = strip_casts (CHREC_RIGHT (chrec));
+ while (TREE_CODE (stride) == MULT_EXPR
+ && multiply_term_by (term, TREE_OPERAND (stride, 1)))
+ stride = strip_casts (TREE_OPERAND (stride, 0));
+
+ gassign *assign;
+ while ((assign = maybe_get_assign (stride))
+ && gimple_assign_rhs_code (assign) == MULT_EXPR
+ && multiply_term_by (term, gimple_assign_rhs2 (assign)))
+ {
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_NOTE, address.stmt,
+ "looking through %G", assign);
+ stride = strip_casts (gimple_assign_rhs1 (assign));
+ }
+
+ analyze_stride (address, term, stride, get_chrec_loop (chrec));
+ }
+}
+
+/* Try to identify loop strides in ADDRESS and try to choose realistic
+ versioning opportunities based on these strides.
+
+ The main difficulty here isn't finding strides that could be used
+ in a version check (that's pretty easy). The problem instead is to
+ avoid versioning for some stride S that is unlikely ever to be 1 at
+ runtime. Versioning for S == 1 on its own would lead to unnecessary
+ code bloat, while adding S == 1 to more realistic version conditions
+ would lose the optimisation opportunity offered by those other conditions.
+
+ For example, versioning for a stride of 1 in the Fortran code:
+
+ integer :: a(:,:)
+ a(1,:) = 1
+
+ is not usually a good idea, since the assignment is iterating over
+ an outer dimension and is relatively unlikely to have a stride of 1.
+ (It isn't impossible, since the inner dimension might be 1, or the
+ array might be transposed.) Similarly, in:
+
+ integer :: a(:,:), b(:,:)
+ b(:,1) = a(1,:)
+
+ b(:,1) is relatively likely to have a stride of 1 while a(1,:) isn't.
+ Versioning for when both strides are 1 would lose most of the benefit
+ of versioning for b's access.
+
+ The approach we take is as follows:
+
+ - Analyze each term to see whether it has an identifiable stride,
+ regardless of which loop applies the stride.
+
+ - Evaluate the likelihood that each such stride is for the innermost
+ dimension of an array, on the scale "likely", "don't know" or "unlikely".
+
+ - If there is a single "likely" innermost stride, and that stride is
+ applied in the loop that contains STMT, version the loop for when the
+ stride is 1. This deals with the cases in which we're fairly
+ confident of doing the right thing, such as the b(:,1) reference above.
+
+ - If there are no "likely" innermost strides, and the loop that contains
+ STMT uses a stride that we rated as "don't know", version for when
+ that stride is 1. This is principally used for C code such as:
+
+ for (int i = 0; i < n; ++i)
+ a[i * x] = ...;
+
+ and:
+
+ for (int j = 0; j < n; ++j)
+ for (int i = 0; i < n; ++i)
+ a[i * x + j * y] = ...;
+
+ where nothing in the way "x" and "y" are set gives a hint as to
+ whether "i" iterates over the innermost dimension of the array.
+ In these situations it seems reasonable to assume the the
+ programmer has nested the loops appropriately (although of course
+ there are examples like GEMM in which this assumption doesn't hold
+ for all accesses in the loop).
+
+ This case is also useful for the Fortran equivalent of the
+ above C code. */
+
+void
+loop_versioning::analyze_address_fragment (address_info &address)
+{
+ if (dump_enabled_p ())
+ {
+ dump_printf_loc (MSG_NOTE, address.stmt, "analyzing address fragment ");
+ dump_address_info (MSG_NOTE, address);
+ dump_printf (MSG_NOTE, "\n");
+ }
+
+ /* Analyze each component of the sum to see whether it involves an
+ apparent stride.
+
+ There is an overlap between the addresses that
+ find_per_loop_multiplication and analyze_term_using_scevs can handle,
+ but the former is much cheaper than SCEV analysis, so try it first. */
+ for (unsigned int i = 0; i < address.terms.length (); ++i)
+ if (!find_per_loop_multiplication (address, address.terms[i]))
+ analyze_term_using_scevs (address, address.terms[i]);
+
+ /* Check for strides that are likely to be for the innermost dimension.
+
+ 1. If there is a single likely inner stride, if it is an SSA name,
+ and if it is worth versioning the loop for when the SSA name
+ equals 1, record that we want to do so.
+
+ 2. Otherwise, if there any likely inner strides, bail out. This means
+ one of:
+
+ (a) There are multiple likely inner strides. This suggests we're
+ confused and be can't be confident of doing the right thing.
+
+ (b) There is a single likely inner stride and it is a constant
+ rather than an SSA name. This can mean either that the access
+ is a natural one without any variable strides, such as:
+
+ for (int i = 0; i < n; ++i)
+ a[i] += 1;
+
+ or that a variable stride is applied to an outer dimension,
+ such as:
+
+ for (int i = 0; i < n; ++i)
+ for (int j = 0; j < n; ++j)
+ a[j * stride][i] += 1;
+
+ (c) There is a single likely inner stride, and it is an SSA name,
+ but it isn't a worthwhile versioning opportunity. This usually
+ means that the variable stride is applied by an outer loop,
+ such as:
+
+ for (int i = 0; i < n; ++i)
+ for (int j = 0; j < n; ++j)
+ a[j][i * stride] += 1;
+
+ or (using an example with a more natural loop nesting):
+
+ for (int i = 0; i < n; ++i)
+ for (int j = 0; j < n; ++j)
+ a[i][j] += b[i * stride];
+
+ in cases where b[i * stride] cannot (yet) be hoisted for
+ aliasing reasons.
+
+ 3. If there are no likely inner strides, fall through to the next
+ set of checks.
+
+ Pointer equality is enough to check for uniqueness in (1), since we
+ only care about SSA names. */
+ tree chosen_stride = NULL_TREE;
+ tree version_stride = NULL_TREE;
+ for (unsigned int i = 0; i < address.terms.length (); ++i)
+ if (chosen_stride != address.terms[i].stride
+ && address.terms[i].inner_likelihood == INNER_LIKELY)
+ {
+ if (chosen_stride)
+ return;
+ chosen_stride = address.terms[i].stride;
+ if (address.terms[i].versioning_opportunity_p)
+ version_stride = chosen_stride;
+ }
+
+ /* If there are no likely inner strides, see if there is a single
+ versioning opportunity for a stride that was rated as INNER_DONT_KNOW.
+ See the comment above the function for the cases that this code
+ handles. */
+ if (!chosen_stride)
+ for (unsigned int i = 0; i < address.terms.length (); ++i)
+ if (version_stride != address.terms[i].stride
+ && address.terms[i].inner_likelihood == INNER_DONT_KNOW
+ && address.terms[i].versioning_opportunity_p)
+ {
+ if (version_stride)
+ return;
+ version_stride = address.terms[i].stride;
+ }
+
+ if (version_stride)
+ version_for_unity (address.stmt, version_stride);
+}
+
+/* Treat EXPR * MULTIPLIER + OFFSET as a fragment of an address that addresses
+ TYPE_SIZE bytes and record this address fragment for later processing.
+ STMT is the statement that contains the address. */
+
+void
+loop_versioning::record_address_fragment (gimple *stmt,
+ unsigned HOST_WIDE_INT type_size,
+ tree expr,
+ unsigned HOST_WIDE_INT multiplier,
+ HOST_WIDE_INT offset)
+{
+ /* We're only interested in computed values. */
+ if (TREE_CODE (expr) != SSA_NAME)
+ return;
+
+ /* Quick exit if no part of the address is calculated in STMT's loop,
+ since such addresses have no versioning opportunities. */
+ struct loop *loop = loop_containing_stmt (stmt);
+ if (expr_invariant_in_loop_p (loop, expr))
+ return;
+
+ /* Set up an address_info for EXPR * MULTIPLIER. */
+ address_info *address = XOBNEW (&m_obstack, address_info);
+ new (address) address_info;
+ address->stmt = stmt;
+ address->loop = loop;
+ address->base = NULL_TREE;
+ address->terms.quick_grow (1);
+ address->terms[0].expr = expr;
+ address->terms[0].multiplier = multiplier;
+ address->terms[0].stride = NULL_TREE;
+ address->terms[0].inner_likelihood = INNER_UNLIKELY;
+ address->terms[0].versioning_opportunity_p = false;
+ address->min_offset = offset;
+
+ /* Peel apart the expression into a sum of address_terms, where each
+ term is multiplied by a constant. Treat a + b and a - b the same,
+ since it doesn't matter for our purposes whether an address is
+ increasing or decreasing. Distribute (a + b) * constant into
+ a * constant + b * constant.
+
+ We don't care which loop each term belongs to, since we want to
+ examine as many candidate strides as possible when determining
+ which is likely to be for the innermost dimension. We therefore
+ don't limit the search to statements in STMT's loop. */
+ for (unsigned int i = 0; i < address->terms.length (); )
+ {
+ if (gassign *assign = maybe_get_assign (address->terms[i].expr))
+ {
+ tree_code code = gimple_assign_rhs_code (assign);
+ if (code == PLUS_EXPR
+ || code == POINTER_PLUS_EXPR
+ || code == MINUS_EXPR)
+ {
+ tree op1 = gimple_assign_rhs1 (assign);
+ tree op2 = gimple_assign_rhs2 (assign);
+ if (TREE_CODE (op2) == INTEGER_CST)
+ {
+ address->terms[i].expr = strip_casts (op1);
+ /* This is heuristic only, so don't worry about truncation
+ or overflow. */
+ address->min_offset += (TREE_INT_CST_LOW (op2)
+ * address->terms[i].multiplier);
+ continue;
+ }
+ else if (address->terms.length () < address_info::MAX_TERMS)
+ {
+ unsigned int j = address->terms.length ();
+ address->terms.quick_push (address->terms[i]);
+ address->terms[i].expr = strip_casts (op1);
+ address->terms[j].expr = strip_casts (op2);
+ continue;
+ }
+ }
+ if (code == MULT_EXPR)
+ {
+ tree op1 = gimple_assign_rhs1 (assign);
+ tree op2 = gimple_assign_rhs2 (assign);
+ if (multiply_term_by (address->terms[i], op2))
+ {
+ address->terms[i].expr = strip_casts (op1);
+ continue;
+ }
+ }
+ }
+ i += 1;
+ }
+
+ /* Peel off any symbolic pointer. */
+ if (TREE_CODE (address->terms[0].expr) != SSA_NAME
+ && address->terms[0].multiplier == 1)
+ {
+ if (address->terms.length () == 1)
+ {
+ obstack_free (&m_obstack, address);
+ return;
+ }
+ address->base = address->terms[0].expr;
+ address->terms.ordered_remove (0);
+ }
+
+ /* Require all remaining terms to be SSA names. (This could be false
+ for unfolded statements, but they aren't worth dealing with.) */
+ for (unsigned int i = 0; i < address->terms.length (); ++i)
+ if (TREE_CODE (address->terms[i].expr) != SSA_NAME)
+ {
+ obstack_free (&m_obstack, address);
+ return;
+ }
+
+ /* The loop above set MIN_OFFSET based on the first byte of the
+ referenced data. Calculate the end + 1. */
+ address->max_offset = address->min_offset + type_size;
+
+ /* Put the terms into a canonical order for the hash table lookup below. */
+ address->terms.qsort (compare_address_terms);
+
+ if (dump_enabled_p ())
+ {
+ dump_printf_loc (MSG_NOTE, stmt, "recording address fragment %T", expr);
+ if (multiplier != 1)
+ dump_printf (MSG_NOTE, " * %wd", multiplier);
+ dump_printf (MSG_NOTE, " = ");
+ dump_address_info (MSG_NOTE, *address);
+ dump_printf (MSG_NOTE, "\n");
+ }
+
+ /* Pool address information with the same terms (but potentially
+ different offsets). */
+ address_info **slot = m_address_table.find_slot (address, INSERT);
+ if (address_info *old_address = *slot)
+ {
+ /* We've already seen an address with the same terms. Extend the
+ offset range to account for this access. Doing this can paper
+ over gaps, such as in:
+
+ a[i * stride * 4] + a[i * stride * 4 + 3];
+
+ where nothing references "+ 1" or "+ 2". However, the vectorizer
+ handles such gapped accesses without problems, so it's not worth
+ trying to exclude them. */
+ if (old_address->min_offset > address->min_offset)
+ old_address->min_offset = address->min_offset;
+ if (old_address->max_offset < address->max_offset)
+ old_address->max_offset = address->max_offset;
+ obstack_free (&m_obstack, address);
+ }
+ else
+ {
+ /* This is the first time we've seen an address with these terms. */
+ *slot = address;
+ m_address_list.safe_push (address);
+ }
+}
+
+/* Analyze expression EXPR, which occurs in STMT. */
+
+void
+loop_versioning::analyze_expr (gimple *stmt, tree expr)
+{
+ unsigned HOST_WIDE_INT type_size;
+
+ while (handled_component_p (expr))
+ {
+ /* See whether we can use versioning to avoid a multiplication
+ in an array index. */
+ if (TREE_CODE (expr) == ARRAY_REF
+ && acceptable_type_p (TREE_TYPE (expr), &type_size))
+ record_address_fragment (stmt, type_size,
+ TREE_OPERAND (expr, 1), type_size, 0);
+ expr = TREE_OPERAND (expr, 0);
+ }
+
+ /* See whether we can use versioning to avoid a multiplication
+ in the pointer calculation of a MEM_REF. */
+ if (TREE_CODE (expr) == MEM_REF
+ && acceptable_type_p (TREE_TYPE (expr), &type_size))
+ record_address_fragment (stmt, type_size, TREE_OPERAND (expr, 0), 1,
+ /* This is heuristic only, so don't worry
+ about truncation or overflow. */
+ TREE_INT_CST_LOW (TREE_OPERAND (expr, 1)));
+
+ /* These would be easy to handle if they existed at this stage. */
+ gcc_checking_assert (TREE_CODE (expr) != TARGET_MEM_REF);
+}
+
+/* Analyze all the statements in BB looking for useful version checks.
+ Return true on success, false if something prevents the block from
+ being versioned. */
+
+bool
+loop_versioning::analyze_block (basic_block bb)
+{
+ struct loop *loop = bb->loop_father;
+ loop_info &li = get_loop_info (loop);
+ for (gimple_stmt_iterator gsi = gsi_start_bb (bb); !gsi_end_p (gsi);
+ gsi_next (&gsi))
+ {
+ gimple *stmt = gsi_stmt (gsi);
+ if (is_gimple_debug (stmt))
+ continue;
+
+ if (expensive_stmt_p (stmt))
+ {
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_NOTE, stmt, "expensive statement"
+ " prevents versioning: %G", stmt);
+ return false;
+ }
+
+ /* Only look for direct versioning opportunities in inner loops
+ since the benefit tends to be much smaller for outer loops. */
+ if (!loop->inner)
+ {
+ unsigned int nops = gimple_num_ops (stmt);
+ for (unsigned int i = 0; i < nops; ++i)
+ if (tree op = gimple_op (stmt, i))
+ analyze_expr (stmt, op);
+ }
+
+ /* The point of the instruction limit is to prevent excessive
+ code growth, so this is a size-based estimate even though
+ the optimization is aimed at speed. */
+ li.num_insns += estimate_num_insns (stmt, &eni_size_weights);
+ }
+
+ return true;
+}
+
+/* Analyze all the blocks in the function, looking for useful version checks.
+ Return true if we found one. */
+
+bool
+loop_versioning::analyze_blocks ()
+{
+ AUTO_DUMP_SCOPE ("analyze_blocks",
+ dump_user_location_t::from_function_decl (m_fn->decl));
+
+ /* For now we don't try to version the whole function, although
+ versioning at that level could be useful in some cases. */
+ get_loop_info (get_loop (m_fn, 0)).rejected_p = true;
+
+ struct loop *loop;
+ FOR_EACH_LOOP (loop, LI_FROM_INNERMOST)
+ {
+ loop_info &linfo = get_loop_info (loop);
+
+ /* Ignore cold loops. */
+ if (!optimize_loop_for_speed_p (loop))
+ linfo.rejected_p = true;
+
+ /* See whether an inner loop prevents versioning of this loop. */
+ if (!linfo.rejected_p)
+ for (struct loop *inner = loop->inner; inner; inner = inner->next)
+ if (get_loop_info (inner).rejected_p)
+ {
+ linfo.rejected_p = true;
+ break;
+ }
+
+ /* If versioning the loop is still a possibility, examine the
+ statements in the loop to look for versioning opportunities. */
+ if (!linfo.rejected_p)
+ {
+ void *start_point = obstack_alloc (&m_obstack, 0);
+
+ for (basic_block bb = linfo.block_list; bb;
+ bb = m_next_block_in_loop[bb->index])
+ if (!analyze_block (bb))
+ {
+ linfo.rejected_p = true;
+ break;
+ }
+
+ if (!linfo.rejected_p)
+ {
+ /* Process any queued address fragments, now that we have
+ complete grouping information. */
+ address_info *address;
+ unsigned int i;
+ FOR_EACH_VEC_ELT (m_address_list, i, address)
+ analyze_address_fragment (*address);
+ }
+
+ m_address_table.empty ();
+ m_address_list.truncate (0);
+ obstack_free (&m_obstack, start_point);
+ }
+ }
+
+ return m_num_conditions != 0;
+}
+
+/* Use the ranges in VRS to remove impossible versioning conditions from
+ LOOP. */
+
+void
+loop_versioning::prune_loop_conditions (struct loop *loop, vr_values *vrs)
+{
+ loop_info &li = get_loop_info (loop);
+
+ int to_remove = -1;
+ bitmap_iterator bi;
+ unsigned int i;
+ EXECUTE_IF_SET_IN_BITMAP (&li.unity_names, 0, i, bi)
+ {
+ tree name = ssa_name (i);
+ value_range *vr = vrs->get_value_range (name);
+ if (vr && !range_includes_p (vr, 1))
+ {
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_NOTE, find_loop_location (loop),
+ "%T can never be 1 in this loop\n", name);
+
+ if (to_remove >= 0)
+ bitmap_clear_bit (&li.unity_names, to_remove);
+ to_remove = i;
+ m_num_conditions -= 1;
+ }
+ }
+ if (to_remove >= 0)
+ bitmap_clear_bit (&li.unity_names, to_remove);
+}
+
+/* Remove any scheduled loop version conditions that will never be true.
+ Return true if any remain. */
+
+bool
+loop_versioning::prune_conditions ()
+{
+ AUTO_DUMP_SCOPE ("prune_loop_conditions",
+ dump_user_location_t::from_function_decl (m_fn->decl));
+
+ calculate_dominance_info (CDI_DOMINATORS);
+ lv_dom_walker dom_walker (*this);
+ dom_walker.walk (ENTRY_BLOCK_PTR_FOR_FN (m_fn));
+ return m_num_conditions != 0;
+}
+
+/* Merge the version checks for INNER into immediately-enclosing loop
+ OUTER. */
+
+void
+loop_versioning::merge_loop_info (struct loop *outer, struct loop *inner)
+{
+ loop_info &inner_li = get_loop_info (inner);
+ loop_info &outer_li = get_loop_info (outer);
+
+ if (dump_enabled_p ())
+ {
+ bitmap_iterator bi;
+ unsigned int i;
+ EXECUTE_IF_SET_IN_BITMAP (&inner_li.unity_names, 0, i, bi)
+ if (!bitmap_bit_p (&outer_li.unity_names, i))
+ dump_printf_loc (MSG_NOTE, find_loop_location (inner),
+ "hoisting check that %T == 1 to outer loop\n",
+ ssa_name (i));
+ }
+
+ bitmap_ior_into (&outer_li.unity_names, &inner_li.unity_names);
+ if (loop_depth (outer_li.outermost) < loop_depth (inner_li.outermost))
+ outer_li.outermost = inner_li.outermost;
+}
+
+/* Add LOOP to the queue of loops to version. */
+
+void
+loop_versioning::add_loop_to_queue (struct loop *loop)
+{
+ loop_info &li = get_loop_info (loop);
+
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_NOTE, find_loop_location (loop),
+ "queuing this loop for versioning\n");
+ m_loops_to_version.safe_push (loop);
+
+ /* Don't try to version superloops. */
+ li.rejected_p = true;
+}
+
+/* Decide whether the cost model would allow us to version LOOP,
+ either directly or as part of a parent loop, and return true if so.
+ This does not imply that the loop is actually worth versioning in its
+ own right, just that it would be valid to version it if something
+ benefited.
+
+ We have already made this decision for all inner loops of LOOP. */
+
+bool
+loop_versioning::decide_whether_loop_is_versionable (struct loop *loop)
+{
+ loop_info &li = get_loop_info (loop);
+
+ if (li.rejected_p)
+ return false;
+
+ /* Examine the decisions made for inner loops. */
+ for (struct loop *inner = loop->inner; inner; inner = inner->next)
+ {
+ loop_info &inner_li = get_loop_info (inner);
+ if (inner_li.rejected_p)
+ {
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_NOTE, find_loop_location (loop),
+ "not versioning this loop because one of its"
+ " inner loops should not be versioned\n");
+ return false;
+ }
+
+ if (inner_li.worth_versioning_p ())
+ li.subloops_benefit_p = true;
+
+ /* Accumulate the number of instructions from subloops that are not
+ the innermost, or that don't benefit from versioning. Only the
+ instructions from innermost loops that benefit from versioning
+ should be weighed against loop-versioning-max-inner-insns;
+ everything else should be weighed against
+ loop-versioning-max-outer-insns. */
+ if (!inner_li.worth_versioning_p () || inner->inner)
+ {
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_NOTE, find_loop_location (loop),
+ "counting %d instructions from this loop"
+ " against its parent loop\n", inner_li.num_insns);
+ li.num_insns += inner_li.num_insns;
+ }
+ }
+
+ /* Enforce the size limits. */
+ if (li.worth_versioning_p ())
+ {
+ unsigned int max_num_insns = max_insns_for_loop (loop);
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_NOTE, find_loop_location (loop),
+ "this loop has %d instructions, against"
+ " a versioning limit of %d\n",
+ li.num_insns, max_num_insns);
+ if (li.num_insns > max_num_insns)
+ {
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_MISSED_OPTIMIZATION
+ | MSG_PRIORITY_USER_FACING,
+ find_loop_location (loop),
+ "this loop is too big to version");
+ return false;
+ }
+ }
+
+ /* Hoist all version checks from subloops to this loop. */
+ for (struct loop *subloop = loop->inner; subloop; subloop = subloop->next)
+ merge_loop_info (loop, subloop);
+
+ return true;
+}
+
+/* Decide which loops to version and add them to the versioning queue.
+ Return true if there are any loops to version. */
+
+bool
+loop_versioning::make_versioning_decisions ()
+{
+ AUTO_DUMP_SCOPE ("make_versioning_decisions",
+ dump_user_location_t::from_function_decl (m_fn->decl));
+
+ struct loop *loop;
+ FOR_EACH_LOOP (loop, LI_FROM_INNERMOST)
+ {
+ loop_info &linfo = get_loop_info (loop);
+ if (decide_whether_loop_is_versionable (loop))
+ {
+ /* Commit to versioning LOOP directly if we can't hoist the
+ version checks any further. */
+ if (linfo.worth_versioning_p ()
+ && (loop_depth (loop) == 1 || linfo.outermost == loop))
+ add_loop_to_queue (loop);
+ }
+ else
+ {
+ /* We can't version this loop, so individually version any
+ subloops that would benefit and haven't been versioned yet. */
+ linfo.rejected_p = true;
+ for (struct loop *subloop = loop->inner; subloop;
+ subloop = subloop->next)
+ if (get_loop_info (subloop).worth_versioning_p ())
+ add_loop_to_queue (subloop);
+ }
+ }
+
+ return !m_loops_to_version.is_empty ();
+}
+
+/* Attempt to implement loop versioning for LOOP, using the information
+ cached in the associated loop_info. Return true on success. */
+
+bool
+loop_versioning::version_loop (struct loop *loop)
+{
+ loop_info &li = get_loop_info (loop);
+
+ /* Build up a condition that selects the original loop instead of
+ the simplified loop. */
+ tree cond = boolean_false_node;
+ bitmap_iterator bi;
+ unsigned int i;
+ EXECUTE_IF_SET_IN_BITMAP (&li.unity_names, 0, i, bi)
+ {
+ tree name = ssa_name (i);
+ tree ne_one = fold_build2 (NE_EXPR, boolean_type_node, name,
+ build_one_cst (TREE_TYPE (name)));
+ cond = fold_build2 (TRUTH_OR_EXPR, boolean_type_node, cond, ne_one);
+ }
+
+ /* Convert the condition into a suitable gcond. */
+ gimple_seq stmts = NULL;
+ cond = force_gimple_operand_1 (cond, &stmts, is_gimple_condexpr, NULL_TREE);
+
+ /* Version the loop. */
+ initialize_original_copy_tables ();
+ basic_block cond_bb;
+ li.optimized_loop = loop_version (loop, cond, &cond_bb,
+ profile_probability::unlikely (),
+ profile_probability::likely (),
+ profile_probability::unlikely (),
+ profile_probability::likely (), true);
+ free_original_copy_tables ();
+ if (!li.optimized_loop)
+ {
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_MISSED_OPTIMIZATION, find_loop_location (loop),
+ "tried but failed to version this loop for when"
+ " certain strides are 1\n");
+ return false;
+ }
+
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, find_loop_location (loop),
+ "versioned this loop for when certain strides are 1\n");
+
+ /* Insert the statements that feed COND. */
+ if (stmts)
+ {
+ gimple_stmt_iterator gsi = gsi_last_bb (cond_bb);
+ gsi_insert_seq_before (&gsi, stmts, GSI_SAME_STMT);
+ }
+
+ return true;
+}
+
+/* Attempt to version all loops in the versioning queue. */
+
+void
+loop_versioning::implement_versioning_decisions ()
+{
+ /* No AUTO_DUMP_SCOPE here since all messages are top-level and
+ user-facing at this point. */
+
+ bool any_succeeded_p = false;
+ struct loop *loop;
+ unsigned int i;
+ FOR_EACH_VEC_ELT (m_loops_to_version, i, loop)
+ if (version_loop (loop))
+ any_succeeded_p = true;
+ if (!any_succeeded_p)
+ return;
+
+ update_ssa (TODO_update_ssa);
+
+ /* Simplify the new loop, which is used when COND is false. */
+ FOR_EACH_VEC_ELT (m_loops_to_version, i, loop)
+ {
+ loop_info &linfo = get_loop_info (loop);
+ if (linfo.optimized_loop)
+ name_prop (linfo).substitute_and_fold (linfo.optimized_loop->header);
+ }
+}
+
+/* Run the pass and return a set of TODO_* flags. */
+
+unsigned int
+loop_versioning::run ()
+{
+ gcc_assert (scev_initialized_p ());
+
+ if (analyze_blocks ()
+ && prune_conditions ()
+ && make_versioning_decisions ())
+ implement_versioning_decisions ();
+
+ return 0;
+}
+
+/* Loop versioning pass. */
+
+const pass_data pass_data_loop_versioning =
+{
+ GIMPLE_PASS, /* type */
+ "lversion", /* name */
+ OPTGROUP_LOOP, /* optinfo_flags */
+ TV_LOOP_VERSIONING, /* tv_id */
+ PROP_cfg, /* properties_required */
+ 0, /* properties_provided */
+ 0, /* properties_destroyed */
+ 0, /* todo_flags_start */
+ 0, /* todo_flags_finish */
+};
+
+class pass_loop_versioning : public gimple_opt_pass
+{
+public:
+ pass_loop_versioning (gcc::context *ctxt)
+ : gimple_opt_pass (pass_data_loop_versioning, ctxt)
+ {}
+
+ /* opt_pass methods: */
+ virtual bool gate (function *) { return flag_version_loops_for_strides; }
+ virtual unsigned int execute (function *);
+};
+
+unsigned int
+pass_loop_versioning::execute (function *fn)
+{
+ if (number_of_loops (fn) <= 1)
+ return 0;
+
+ return loop_versioning (fn).run ();
+}
+
+} // anon namespace
+
+gimple_opt_pass *
+make_pass_loop_versioning (gcc::context *ctxt)
+{
+ return new pass_loop_versioning (ctxt);
+}
diff --git a/gcc/opts.c b/gcc/opts.c
index c16b5a44855..64e56c25782 100644
--- a/gcc/opts.c
+++ b/gcc/opts.c
@@ -556,6 +556,7 @@ static const struct default_options default_options_table[] =
{ OPT_LEVELS_3_PLUS, OPT_ftree_slp_vectorize, NULL, 1 },
{ OPT_LEVELS_3_PLUS, OPT_funswitch_loops, NULL, 1 },
{ OPT_LEVELS_3_PLUS, OPT_fvect_cost_model_, NULL, VECT_COST_MODEL_DYNAMIC },
+ { OPT_LEVELS_3_PLUS, OPT_fversion_loops_for_strides, NULL, 1 },
/* -Ofast adds optimizations to -O3. */
{ OPT_LEVELS_FAST, OPT_ffast_math, NULL, 1 },
diff --git a/gcc/params.def b/gcc/params.def
index 982f180a312..6f98fccd291 100644
--- a/gcc/params.def
+++ b/gcc/params.def
@@ -1365,6 +1365,19 @@ DEFPARAM(PARAM_LOGICAL_OP_NON_SHORT_CIRCUIT,
"True if a non-short-circuit operation is optimal.",
-1, -1, 1)
+DEFPARAM(PARAM_LOOP_VERSIONING_MAX_INNER_INSNS,
+ "loop-versioning-max-inner-insns",
+ "The maximum number of instructions in an inner loop that is being"
+ " considered for versioning.",
+ 200, 0, 0)
+
+DEFPARAM(PARAM_LOOP_VERSIONING_MAX_OUTER_INSNS,
+ "loop-versioning-max-outer-insns",
+ "The maximum number of instructions in an outer loop that is being"
+ " considered for versioning, on top of the instructions in inner"
+ " loops.",
+ 100, 0, 0)
+
/*
Local variables:
diff --git a/gcc/passes.def b/gcc/passes.def
index 0079fecef32..144df4fa417 100644
--- a/gcc/passes.def
+++ b/gcc/passes.def
@@ -265,6 +265,7 @@ along with GCC; see the file COPYING3. If not see
NEXT_PASS (pass_tree_unswitch);
NEXT_PASS (pass_scev_cprop);
NEXT_PASS (pass_loop_split);
+ NEXT_PASS (pass_loop_versioning);
NEXT_PASS (pass_loop_jam);
/* All unswitching, final value replacement and splitting can expose
empty loops. Remove them now. */
diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog
index 42769790930..6fc2f0c9e5a 100644
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -1,3 +1,25 @@
+2018-12-17 Richard Sandiford <richard.sandiford@arm.com>
+
+ * gcc.dg/loop-versioning-1.c: New test.
+ * gcc.dg/loop-versioning-10.c: Likewise.
+ * gcc.dg/loop-versioning-11.c: Likewise.
+ * gcc.dg/loop-versioning-2.c: Likewise.
+ * gcc.dg/loop-versioning-3.c: Likewise.
+ * gcc.dg/loop-versioning-4.c: Likewise.
+ * gcc.dg/loop-versioning-5.c: Likewise.
+ * gcc.dg/loop-versioning-6.c: Likewise.
+ * gcc.dg/loop-versioning-7.c: Likewise.
+ * gcc.dg/loop-versioning-8.c: Likewise.
+ * gcc.dg/loop-versioning-9.c: Likewise.
+ * gfortran.dg/loop_versioning_1.f90: Likewise.
+ * gfortran.dg/loop_versioning_2.f90: Likewise.
+ * gfortran.dg/loop_versioning_3.f90: Likewise.
+ * gfortran.dg/loop_versioning_4.f90: Likewise.
+ * gfortran.dg/loop_versioning_5.f90: Likewise.
+ * gfortran.dg/loop_versioning_6.f90: Likewise.
+ * gfortran.dg/loop_versioning_7.f90: Likewise.
+ * gfortran.dg/loop_versioning_8.f90: Likewise.
+
2018-12-16 Steven G. Kargl <kargl@gcc.gnu.org>
PR fortran/88116
diff --git a/gcc/testsuite/gcc.dg/loop-versioning-1.c b/gcc/testsuite/gcc.dg/loop-versioning-1.c
new file mode 100644
index 00000000000..e61ff7a5d10
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/loop-versioning-1.c
@@ -0,0 +1,92 @@
+/* { dg-options "-O3 -fdump-tree-lversion-details" } */
+
+/* The simplest IV case. */
+
+void
+f1 (double *x, int stepx, int n)
+{
+ for (int i = 0; i < n; ++i)
+ x[stepx * i] = 100;
+}
+
+void
+f2 (double *x, int stepx, int limit)
+{
+ for (int i = 0; i < limit; i += stepx)
+ x[i] = 100;
+}
+
+void
+f3 (double *x, int stepx, int limit)
+{
+ for (double *y = x; y < x + limit; y += stepx)
+ *y = 100;
+}
+
+void
+f4 (double *x, int stepx, unsigned int n)
+{
+ for (unsigned int i = 0; i < n; ++i)
+ x[stepx * i] = 100;
+}
+
+void
+f5 (double *x, int stepx, unsigned int limit)
+{
+ for (unsigned int i = 0; i < limit; i += stepx)
+ x[i] = 100;
+}
+
+void
+f6 (double *x, int stepx, unsigned int limit)
+{
+ for (double *y = x; y < x + limit; y += stepx)
+ *y = 100;
+}
+
+double x[10000];
+
+void
+g1 (int stepx, int n)
+{
+ for (int i = 0; i < n; ++i)
+ x[stepx * i] = 100;
+}
+
+void
+g2 (int stepx, int limit)
+{
+ for (int i = 0; i < limit; i += stepx)
+ x[i] = 100;
+}
+
+void
+g3 (int stepx, int limit)
+{
+ for (double *y = x; y < x + limit; y += stepx)
+ *y = 100;
+}
+
+void
+g4 (int stepx, unsigned int n)
+{
+ for (unsigned int i = 0; i < n; ++i)
+ x[stepx * i] = 100;
+}
+
+void
+g5 (int stepx, unsigned int limit)
+{
+ for (unsigned int i = 0; i < limit; i += stepx)
+ x[i] = 100;
+}
+
+void
+g6 (int stepx, unsigned int limit)
+{
+ for (double *y = x; y < x + limit; y += stepx)
+ *y = 100;
+}
+
+/* { dg-final { scan-tree-dump-times {want to version containing loop} 12 "lversion" } } */
+/* { dg-final { scan-tree-dump-times {versioned this loop} 12 "lversion" } } */
diff --git a/gcc/testsuite/gcc.dg/loop-versioning-10.c b/gcc/testsuite/gcc.dg/loop-versioning-10.c
new file mode 100644
index 00000000000..f634448c253
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/loop-versioning-10.c
@@ -0,0 +1,52 @@
+/* { dg-options "-O3 -fdump-tree-lversion-details" } */
+
+/* Test that we can version a gather-like operation in which a variable
+ stride is applied to the index. */
+
+int
+f1 (int *x, int *index, int step, int n)
+{
+ int res = 0;
+ for (int i = 0; i < n; ++i)
+ res += x[index[i] * step];
+ return res;
+}
+
+int
+f2 (int *x, int *index, int step, int n)
+{
+ int res = 0;
+ for (int i = 0; i < n; ++i)
+ {
+ int *ptr = x + index[i] * step;
+ res += *ptr;
+ }
+ return res;
+}
+
+int x[1000];
+
+int
+g1 (int *index, int step, int n)
+{
+ int res = 0;
+ for (int i = 0; i < n; ++i)
+ res += x[index[i] * step];
+ return res;
+}
+
+int
+g2 (int *index, int step, int n)
+{
+ int res = 0;
+ for (int i = 0; i < n; ++i)
+ {
+ int *ptr = x + index[i] * step;
+ res += *ptr;
+ }
+ return res;
+}
+
+/* { dg-final { scan-tree-dump-times {address term [^\n]* \* loop-invariant} 4 "lversion" } } */
+/* { dg-final { scan-tree-dump-times {want to version containing loop} 4 "lversion" } } */
+/* { dg-final { scan-tree-dump-times {versioned this loop} 4 "lversion" } } */
diff --git a/gcc/testsuite/gcc.dg/loop-versioning-11.c b/gcc/testsuite/gcc.dg/loop-versioning-11.c
new file mode 100644
index 00000000000..77ff484a4c2
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/loop-versioning-11.c
@@ -0,0 +1,29 @@
+/* { dg-options "-O3 -fdump-tree-lversion-details" } */
+
+/* Test that we don't try to version for something that is never 1. */
+
+void
+f1 (double *x, int stepx, int n)
+{
+ if (stepx == 1)
+ for (int i = 0; i < n; ++i)
+ x[i] = 100;
+ else
+ for (int i = 0; i < n; ++i)
+ x[stepx * i] = 100;
+}
+
+void
+f2 (double *x, int stepx, int n)
+{
+ if (stepx <= 1)
+ for (int i = 0; i < n; ++i)
+ x[i] = 100;
+ else
+ for (int i = 0; i < n; ++i)
+ x[stepx * i] = 100;
+}
+
+/* { dg-final { scan-tree-dump-times {want to version containing loop} 2 "lversion" } } */
+/* { dg-final { scan-tree-dump-times {can never be 1} 2 "lversion" } } */
+/* { dg-final { scan-tree-dump-not {versioned} "lversion" } } */
diff --git a/gcc/testsuite/gcc.dg/loop-versioning-12.c b/gcc/testsuite/gcc.dg/loop-versioning-12.c
new file mode 100644
index 00000000000..560acc4d80d
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/loop-versioning-12.c
@@ -0,0 +1,149 @@
+/* { dg-options "-O3 -fdump-tree-lversion-details" } */
+
+/* Test that we don't try to version for a step of 1 when that would
+ cause the iterations to overlap. */
+
+void
+f1 (unsigned short *x, int stepx, int n)
+{
+ for (int i = 0; i < n; ++i)
+ {
+ x[i * stepx] = 100;
+ x[i * stepx + 1] = 99;
+ }
+}
+
+void
+f2 (unsigned short *x, int stepx, int n)
+{
+ for (int i = 0; i < n; i += stepx)
+ {
+ x[i] = 100;
+ x[i + 1] = 99;
+ }
+}
+
+void
+f3 (unsigned short *x, int stepx, int n)
+{
+ for (int i = 0; i < n; ++i)
+ {
+ x[i * stepx - 16] = 100;
+ x[i * stepx - 15] = 99;
+ }
+}
+
+void
+f4 (unsigned short *x, int stepx, int n)
+{
+ for (int i = 0; i < n; i += stepx)
+ {
+ x[i - 16] = 100;
+ x[i - 15] = 99;
+ }
+}
+
+void
+f5 (unsigned short *x, int stepx, int n)
+{
+ for (int i = 0; i < n; ++i)
+ {
+ x[i * stepx - 16] = 100;
+ x[i * stepx + 15] = 99;
+ }
+}
+
+void
+f6 (unsigned short *x, int stepx, int n)
+{
+ for (int i = 0; i < n; i += stepx)
+ {
+ x[i - 16] = 100;
+ x[i + 15] = 99;
+ }
+}
+
+void
+f7 (unsigned short *x, int stepx, int n)
+{
+ for (unsigned short *y = x; y < x + n; y += stepx)
+ {
+ y[0] = 100;
+ y[1] = 99;
+ }
+}
+
+unsigned short x[1000];
+
+void
+g1 (int stepx, int n)
+{
+ for (int i = 0; i < n; ++i)
+ {
+ x[i * stepx] = 100;
+ x[i * stepx + 1] = 99;
+ }
+}
+
+void
+g2 (int stepx, int n)
+{
+ for (int i = 0; i < n; i += stepx)
+ {
+ x[i] = 100;
+ x[i + 1] = 99;
+ }
+}
+
+void
+g3 (int stepx, int n)
+{
+ for (int i = 0; i < n; ++i)
+ {
+ x[i * stepx - 16] = 100;
+ x[i * stepx - 15] = 99;
+ }
+}
+
+void
+g4 (int stepx, int n)
+{
+ for (int i = 0; i < n; i += stepx)
+ {
+ x[i - 16] = 100;
+ x[i - 15] = 99;
+ }
+}
+
+void
+g5 (int stepx, int n)
+{
+ for (int i = 0; i < n; ++i)
+ {
+ x[i * stepx - 16] = 100;
+ x[i * stepx + 15] = 99;
+ }
+}
+
+void
+g6 (int stepx, int n)
+{
+ for (int i = 0; i < n; i += stepx)
+ {
+ x[i - 16] = 100;
+ x[i + 15] = 99;
+ }
+}
+
+void
+g7 (int stepx, int n)
+{
+ for (unsigned short *y = x; y < x + n; y += stepx)
+ {
+ y[0] = 100;
+ y[1] = 99;
+ }
+}
+
+/* { dg-final { scan-tree-dump-not {want to version} "lversion" } } */
+/* { dg-final { scan-tree-dump-not {versioned} "lversion" } } */
diff --git a/gcc/testsuite/gcc.dg/loop-versioning-13.c b/gcc/testsuite/gcc.dg/loop-versioning-13.c
new file mode 100644
index 00000000000..c67da047fa6
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/loop-versioning-13.c
@@ -0,0 +1,109 @@
+/* { dg-options "-O3 -fdump-tree-lversion-details" } */
+
+/* Test that we do version for a step of 1 when that would lead the
+ iterations to access consecutive groups. */
+
+void
+f1 (unsigned short *x, int stepx, int n)
+{
+ for (int i = 0; i < n; ++i)
+ {
+ x[i * stepx * 2] = 100;
+ x[i * stepx * 2 + 1] = 99;
+ }
+}
+
+void
+f2 (unsigned short *x, int stepx, int n)
+{
+ for (int i = 0; i < n; i += stepx * 2)
+ {
+ x[i] = 100;
+ x[i + 1] = 99;
+ }
+}
+
+void
+f3 (unsigned short *x, int stepx, int n)
+{
+ for (int i = 0; i < n; ++i)
+ {
+ x[i * stepx * 2 - 16] = 100;
+ x[i * stepx * 2 - 15] = 99;
+ }
+}
+
+void
+f4 (unsigned short *x, int stepx, int n)
+{
+ for (int i = 0; i < n; i += stepx * 2)
+ {
+ x[i - 16] = 100;
+ x[i - 15] = 99;
+ }
+}
+
+void
+f5 (unsigned short *x, int stepx, int n)
+{
+ for (unsigned short *y = x; y < x + n; y += stepx * 2)
+ {
+ y[0] = 100;
+ y[1] = 99;
+ }
+}
+
+unsigned short x[1000];
+
+void
+g1 (int stepx, int n)
+{
+ for (int i = 0; i < n; ++i)
+ {
+ x[i * stepx * 2] = 100;
+ x[i * stepx * 2 + 1] = 99;
+ }
+}
+
+void
+g2 (int stepx, int n)
+{
+ for (int i = 0; i < n; i += stepx * 2)
+ {
+ x[i] = 100;
+ x[i + 1] = 99;
+ }
+}
+
+void
+g3 (int stepx, int n)
+{
+ for (int i = 0; i < n; ++i)
+ {
+ x[i * stepx * 2 - 16] = 100;
+ x[i * stepx * 2 - 15] = 99;
+ }
+}
+
+void
+g4 (int stepx, int n)
+{
+ for (int i = 0; i < n; i += stepx * 2)
+ {
+ x[i - 16] = 100;
+ x[i - 15] = 99;
+ }
+}
+
+void
+g5 (int stepx, int n)
+{
+ for (unsigned short *y = x; y < x + n; y += stepx * 2)
+ {
+ y[0] = 100;
+ y[1] = 99;
+ }
+}
+
+/* { dg-final { scan-tree-dump-times {want to version containing loop} 10 "lversion" } } */
+/* { dg-final { scan-tree-dump-times {versioned this loop} 10 "lversion" } } */
diff --git a/gcc/testsuite/gcc.dg/loop-versioning-14.c b/gcc/testsuite/gcc.dg/loop-versioning-14.c
new file mode 100644
index 00000000000..f2926e5650c
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/loop-versioning-14.c
@@ -0,0 +1,149 @@
+/* { dg-options "-O3 -fdump-tree-lversion-details" } */
+
+/* Test that we don't try to version for a step of 1 when that would
+ cause the iterations to leave a gap between accesses. */
+
+void
+f1 (unsigned short *x, int stepx, int n)
+{
+ for (int i = 0; i < n; ++i)
+ {
+ x[i * stepx * 4] = 100;
+ x[i * stepx * 4 + 1] = 99;
+ }
+}
+
+void
+f2 (unsigned short *x, int stepx, int n)
+{
+ for (int i = 0; i < n; i += stepx * 4)
+ {
+ x[i] = 100;
+ x[i + 1] = 99;
+ }
+}
+
+void
+f3 (unsigned short *x, int stepx, int n)
+{
+ for (int i = 0; i < n; ++i)
+ {
+ x[i * stepx * 4 - 16] = 100;
+ x[i * stepx * 4 - 15] = 99;
+ }
+}
+
+void
+f4 (unsigned short *x, int stepx, int n)
+{
+ for (int i = 0; i < n; i += stepx * 4)
+ {
+ x[i - 16] = 100;
+ x[i - 15] = 99;
+ }
+}
+
+void
+f5 (unsigned short *x, int stepx, int n)
+{
+ for (int i = 0; i < n; ++i)
+ {
+ x[i * stepx * 64 - 16] = 100;
+ x[i * stepx * 64 + 15] = 99;
+ }
+}
+
+void
+f6 (unsigned short *x, int stepx, int n)
+{
+ for (int i = 0; i < n; i += stepx * 64)
+ {
+ x[i - 16] = 100;
+ x[i + 15] = 99;
+ }
+}
+
+void
+f7 (unsigned short *x, int stepx, int n)
+{
+ for (unsigned short *y = x; y < x + n; y += stepx * 4)
+ {
+ y[0] = 100;
+ y[1] = 99;
+ }
+}
+
+unsigned short x[1000];
+
+void
+g1 (int stepx, int n)
+{
+ for (int i = 0; i < n; ++i)
+ {
+ x[i * stepx * 4] = 100;
+ x[i * stepx * 4 + 1] = 99;
+ }
+}
+
+void
+g2 (int stepx, int n)
+{
+ for (int i = 0; i < n; i += stepx * 4)
+ {
+ x[i] = 100;
+ x[i + 1] = 99;
+ }
+}
+
+void
+g3 (int stepx, int n)
+{
+ for (int i = 0; i < n; ++i)
+ {
+ x[i * stepx * 4 - 16] = 100;
+ x[i * stepx * 4 - 15] = 99;
+ }
+}
+
+void
+g4 (int stepx, int n)
+{
+ for (int i = 0; i < n; i += stepx * 4)
+ {
+ x[i - 16] = 100;
+ x[i - 15] = 99;
+ }
+}
+
+void
+g5 (int stepx, int n)
+{
+ for (int i = 0; i < n; ++i)
+ {
+ x[i * stepx * 64 - 16] = 100;
+ x[i * stepx * 64 + 15] = 99;
+ }
+}
+
+void
+g6 (int stepx, int n)
+{
+ for (int i = 0; i < n; i += stepx * 64)
+ {
+ x[i - 16] = 100;
+ x[i + 15] = 99;
+ }
+}
+
+void
+g7 (int stepx, int n)
+{
+ for (unsigned short *y = x; y < x + n; y += stepx * 4)
+ {
+ y[0] = 100;
+ y[1] = 99;
+ }
+}
+
+/* { dg-final { scan-tree-dump-not {want to version} "lversion" } } */
+/* { dg-final { scan-tree-dump-not {versioned} "lversion" } } */
diff --git a/gcc/testsuite/gcc.dg/loop-versioning-2.c b/gcc/testsuite/gcc.dg/loop-versioning-2.c
new file mode 100644
index 00000000000..9416b540614
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/loop-versioning-2.c
@@ -0,0 +1,73 @@
+/* { dg-options "-O3 -fdump-tree-lversion-details" } */
+
+/* Versioning for step == 1 in these loops would allow loop interchange,
+ but otherwise isn't worthwhile. At the moment we decide not to version. */
+
+void
+f1 (double x[][100], int step, int n)
+{
+ for (int i = 0; i < n; ++i)
+ for (int j = 0; j < n; ++j)
+ x[j * step][i] = 100;
+}
+
+void
+f2 (double x[][100], int step, int n)
+{
+ for (int i = 0; i < n; ++i)
+ for (int j = 0; j < n; ++j)
+ x[j][i * step] = 100;
+}
+
+void
+f3 (double x[][100], int step, int limit)
+{
+ for (int i = 0; i < 100; ++i)
+ for (int j = 0; j < limit; j += step)
+ x[j][i] = 100;
+}
+
+void
+f4 (double x[][100], int step, int limit)
+{
+ for (int i = 0; i < limit; i += step)
+ for (int j = 0; j < 100; ++j)
+ x[j][i] = 100;
+}
+
+double x[100][100];
+
+void
+g1 (int step, int n)
+{
+ for (int i = 0; i < n; ++i)
+ for (int j = 0; j < n; ++j)
+ x[j * step][i] = 100;
+}
+
+void
+g2 (int step, int n)
+{
+ for (int i = 0; i < n; ++i)
+ for (int j = 0; j < n; ++j)
+ x[j][i * step] = 100;
+}
+
+void
+g3 (int step, int limit)
+{
+ for (int i = 0; i < 100; ++i)
+ for (int j = 0; j < limit; j += step)
+ x[j][i] = 100;
+}
+
+void
+g4 (int step, int limit)
+{
+ for (int i = 0; i < limit; i += step)
+ for (int j = 0; j < 100; ++j)
+ x[j][i] = 100;
+}
+
+/* { dg-final { scan-tree-dump-not {want to version} "lversion" } } */
+/* { dg-final { scan-tree-dump-not {versioned} "lversion" } } */
diff --git a/gcc/testsuite/gcc.dg/loop-versioning-3.c b/gcc/testsuite/gcc.dg/loop-versioning-3.c
new file mode 100644
index 00000000000..6565122cb1e
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/loop-versioning-3.c
@@ -0,0 +1,24 @@
+/* { dg-options "-O3 -fdump-tree-lversion-details" } */
+
+/* Versioning these loops for when both steps are 1 allows loop
+ interchange, but otherwise isn't worthwhile. At the moment we decide
+ not to version. */
+
+void
+f1 (double x[][100], int step1, int step2, int n)
+{
+ for (int i = 0; i < n; ++i)
+ for (int j = 0; j < n; ++j)
+ x[j * step1][i * step2] = 100;
+}
+
+void
+f2 (double x[][100], int step1, int step2, int limit)
+{
+ for (int i = 0; i < limit; i += step1)
+ for (int j = 0; j < limit; j += step2)
+ x[j][i] = 100;
+}
+
+/* { dg-final { scan-tree-dump-not {want to version} "lversion" } } */
+/* { dg-final { scan-tree-dump-not {versioned} "lversion" } } */
diff --git a/gcc/testsuite/gcc.dg/loop-versioning-4.c b/gcc/testsuite/gcc.dg/loop-versioning-4.c
new file mode 100644
index 00000000000..0ebb2b0e86f
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/loop-versioning-4.c
@@ -0,0 +1,39 @@
+/* { dg-options "-O3 -fdump-tree-lversion-details" } */
+
+/* These shouldn't be versioned; it's extremely likely that the code
+ is emulating two-dimensional arrays. */
+
+void
+f1 (double *x, int step, int n)
+{
+ for (int i = 0; i < n; ++i)
+ for (int j = 0; j < n; ++j)
+ x[i * step + j] = 100;
+}
+
+void
+f2 (double *x, int step, int n)
+{
+ for (int i = 0; i < n; ++i)
+ for (int j = 0; j < n; ++j)
+ x[j * step + i] = 100;
+}
+
+void
+f3 (double *x, int *offsets, int step, int n)
+{
+ for (int i = 0; i < n; ++i)
+ for (int j = 0; j < n; ++j)
+ x[i * step + j + offsets[i]] = 100;
+}
+
+void
+f4 (double *x, int *offsets, int step, int n)
+{
+ for (int i = 0; i < n; ++i)
+ for (int j = 0; j < n; ++j)
+ x[j * step + i + offsets[i]] = 100;
+}
+
+/* { dg-final { scan-tree-dump-not {want to version} "lversion" } } */
+/* { dg-final { scan-tree-dump-not {versioned} "lversion" } } */
diff --git a/gcc/testsuite/gcc.dg/loop-versioning-5.c b/gcc/testsuite/gcc.dg/loop-versioning-5.c
new file mode 100644
index 00000000000..5d4cc2c3019
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/loop-versioning-5.c
@@ -0,0 +1,17 @@
+/* { dg-options "-O3 -fdump-tree-lversion-details" } */
+
+/* There's no information about whether STEP1 or STEP2 is innermost,
+ so we should assume the code is sensible and version for the inner
+ evolution, i.e. when STEP2 is 1. */
+
+void
+f1 (double *x, int step1, int step2, int n)
+{
+ for (int i = 0; i < n; ++i)
+ for (int j = 0; j < n; ++j)
+ x[i * step1 + j * step2] = 100;
+}
+
+/* { dg-final { scan-tree-dump-times {want to version containing loop for when step2} 1 "lversion" } } */
+/* { dg-final { scan-tree-dump-times {want to version containing loop} 1 "lversion" } } */
+/* { dg-final { scan-tree-dump-times {versioned this loop} 1 "lversion" } } */
diff --git a/gcc/testsuite/gcc.dg/loop-versioning-6.c b/gcc/testsuite/gcc.dg/loop-versioning-6.c
new file mode 100644
index 00000000000..e718c233502
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/loop-versioning-6.c
@@ -0,0 +1,31 @@
+/* { dg-options "-O3 -fdump-tree-lversion-details" } */
+
+/* The read from y in f1 will be hoisted to the outer loop. In general
+ it's not worth versioning outer loops when the inner loops don't also
+ benefit.
+
+ This test is meant to be a slight counterexample, since versioning
+ does lead to cheaper outer-loop vectorization. However, the benefit
+ isn't enough to justify the cost. */
+
+void
+f1 (double *restrict x, double *restrict y, int step, int n)
+{
+ for (int i = 0; i < n; ++i)
+ for (int j = 0; j < n; ++j)
+ x[i + j] = y[i * step];
+}
+
+/* A similar example in which the read can't be hoisted, but could
+ for example be handled by vectorizer alias checks. */
+
+void
+f2 (double *x, double *y, int step, int n)
+{
+ for (int i = 0; i < n; ++i)
+ for (int j = 0; j < n; ++j)
+ x[i + j] = y[i * step];
+}
+
+/* { dg-final { scan-tree-dump-not {want to version} "lversion" } } */
+/* { dg-final { scan-tree-dump-not {versioned} "lversion" } } */
diff --git a/gcc/testsuite/gcc.dg/loop-versioning-7.c b/gcc/testsuite/gcc.dg/loop-versioning-7.c
new file mode 100644
index 00000000000..ac0c2018795
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/loop-versioning-7.c
@@ -0,0 +1,32 @@
+/* { dg-options "-O3 -fdump-tree-lversion-details" } */
+
+/* Check that versioning can handle arrays of structures. */
+
+struct foo {
+ int a, b, c;
+};
+
+void
+f1 (struct foo *x, int stepx, int n)
+{
+ for (int i = 0; i < n; ++i)
+ {
+ x[stepx * i].a = 1;
+ x[stepx * i].b = 2;
+ x[stepx * i].c = 3;
+ }
+}
+
+void
+f2 (struct foo *x, int stepx, int limit)
+{
+ for (int i = 0; i < limit; i += stepx)
+ {
+ x[i].a = 1;
+ x[i].b = 2;
+ x[i].c = 3;
+ }
+}
+
+/* { dg-final { scan-tree-dump-times {want to version containing loop} 2 "lversion" } } */
+/* { dg-final { scan-tree-dump-times {versioned this loop} 2 "lversion" } } */
diff --git a/gcc/testsuite/gcc.dg/loop-versioning-8.c b/gcc/testsuite/gcc.dg/loop-versioning-8.c
new file mode 100644
index 00000000000..5645b13dc1a
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/loop-versioning-8.c
@@ -0,0 +1,43 @@
+/* { dg-options "-O3 -fdump-tree-lversion-details" } */
+
+/* Versioning for step == 1 in these loops would allow loop interchange,
+ but otherwise isn't worthwhile. At the moment we decide not to version. */
+
+struct foo {
+ int a[100];
+};
+
+void
+f1 (struct foo *x, int step, int n)
+{
+ for (int i = 0; i < n; ++i)
+ for (int j = 0; j < n; ++j)
+ x[j * step].a[i] = 100;
+}
+
+void
+f2 (struct foo *x, int step, int n)
+{
+ for (int i = 0; i < n; ++i)
+ for (int j = 0; j < n; ++j)
+ x[j].a[i * step] = 100;
+}
+
+void
+f3 (struct foo *x, int step, int limit)
+{
+ for (int i = 0; i < 100; ++i)
+ for (int j = 0; j < limit; j += step)
+ x[j].a[i] = 100;
+}
+
+void
+f4 (struct foo *x, int step, int limit)
+{
+ for (int i = 0; i < limit; i += step)
+ for (int j = 0; j < 100; ++j)
+ x[j].a[i] = 100;
+}
+
+/* { dg-final { scan-tree-dump-not {want to version} "lversion" } } */
+/* { dg-final { scan-tree-dump-not {versioned} "lversion" } } */
diff --git a/gcc/testsuite/gcc.dg/loop-versioning-9.c b/gcc/testsuite/gcc.dg/loop-versioning-9.c
new file mode 100644
index 00000000000..cfbcd821b22
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/loop-versioning-9.c
@@ -0,0 +1,48 @@
+/* { dg-options "-O3 -fdump-tree-lversion-details" } */
+
+/* Check that versioning can handle small groups of accesses. */
+
+void
+f1 (int *x, int *y, int step, int n)
+{
+ for (int i = 0; i < n; ++i)
+ x[i] = y[i * step * 2] + y[i * step * 2 + 1];
+}
+
+void
+f2 (int *x, int *y, __INTPTR_TYPE__ step, int n)
+{
+ for (int i = 0; i < n; ++i)
+ x[i] = y[i * step * 2] + y[i * step * 2 + 1];
+}
+
+void
+f3 (int *x, int *y, int step, int n)
+{
+ for (int i = 0; i < n; ++i)
+ x[i] = y[i * step * 3] + y[i * step * 3 + 2];
+}
+
+void
+f4 (int *x, int *y, __INTPTR_TYPE__ step, int n)
+{
+ for (int i = 0; i < n; ++i)
+ x[i] = y[i * step * 3] + y[i * step * 3 + 2];
+}
+
+void
+f5 (int *x, int *y, int step, int n)
+{
+ for (int i = 0; i < n; ++i)
+ x[i] = y[i * step * 4] + y[i * step * 4 + 3];
+}
+
+void
+f6 (int *x, int *y, __INTPTR_TYPE__ step, int n)
+{
+ for (int i = 0; i < n; ++i)
+ x[i] = y[i * step * 4] + y[i * step * 4 + 3];
+}
+
+/* { dg-final { scan-tree-dump-times {want to version containing loop} 6 "lversion" } } */
+/* { dg-final { scan-tree-dump-times {versioned this loop} 6 "lversion" } } */
diff --git a/gcc/testsuite/gcc.dg/vect/slp-43.c b/gcc/testsuite/gcc.dg/vect/slp-43.c
index 880a2961ae1..0344cc98625 100644
--- a/gcc/testsuite/gcc.dg/vect/slp-43.c
+++ b/gcc/testsuite/gcc.dg/vect/slp-43.c
@@ -1,5 +1,5 @@
/* { dg-require-effective-target vect_int } */
-/* { dg-additional-options "-O3" } */
+/* { dg-additional-options "-O3 -fno-version-loops-for-strides" } */
#include <string.h>
#include "tree-vect.h"
diff --git a/gcc/testsuite/gcc.dg/vect/slp-45.c b/gcc/testsuite/gcc.dg/vect/slp-45.c
index 20a9f815292..d83472ca095 100644
--- a/gcc/testsuite/gcc.dg/vect/slp-45.c
+++ b/gcc/testsuite/gcc.dg/vect/slp-45.c
@@ -1,5 +1,5 @@
/* { dg-require-effective-target vect_int } */
-/* { dg-additional-options "-O3" } */
+/* { dg-additional-options "-O3 -fno-version-loops-for-strides" } */
#include <string.h>
#include "tree-vect.h"
diff --git a/gcc/testsuite/gfortran.dg/loop_versioning_1.f90 b/gcc/testsuite/gfortran.dg/loop_versioning_1.f90
new file mode 100644
index 00000000000..e80f8920d00
--- /dev/null
+++ b/gcc/testsuite/gfortran.dg/loop_versioning_1.f90
@@ -0,0 +1,28 @@
+! { dg-options "-O3 -fdump-tree-lversion-details" }
+
+! The simplest IV case.
+
+subroutine f1(x)
+ real :: x(:)
+ x(:) = 100
+end subroutine f1
+
+subroutine f2(x, n, step)
+ integer :: n, step
+ real :: x(n * step)
+ do i = 1, n
+ x(i * step) = 100
+ end do
+end subroutine f2
+
+subroutine f3(x, limit, step)
+ integer :: limit, step
+ real :: x(limit)
+ do i = 1, limit, step
+ x(i) = 100
+ end do
+end subroutine f3
+
+! { dg-final { scan-tree-dump-times {likely to be the innermost dimension} 1 "lversion" } }
+! { dg-final { scan-tree-dump-times {want to version containing loop} 3 "lversion" } }
+! { dg-final { scan-tree-dump-times {versioned this loop} 3 "lversion" } }
diff --git a/gcc/testsuite/gfortran.dg/loop_versioning_2.f90 b/gcc/testsuite/gfortran.dg/loop_versioning_2.f90
new file mode 100644
index 00000000000..522ef912947
--- /dev/null
+++ b/gcc/testsuite/gfortran.dg/loop_versioning_2.f90
@@ -0,0 +1,39 @@
+! { dg-options "-O3 -fdump-tree-lversion-details -fno-frontend-loop-interchange" }
+
+! We could version the loop for when the first dimension has a stride
+! of 1, but at present there's no real benefit. The gimple loop
+! interchange pass couldn't handle the versioned loop, and interchange
+! is instead done by the frontend (but disabled by the options above).
+
+subroutine f1(x)
+ real :: x(:, :)
+ do i = lbound(x, 1), ubound(x, 1)
+ do j = lbound(x, 2), ubound(x, 2)
+ x(i, j) = 100
+ end do
+ end do
+end subroutine f1
+
+subroutine f2(x, n, step)
+ integer :: n, step
+ real :: x(100, 100)
+ do i = 1, n
+ do j = 1, n
+ x(i * step, j) = 100
+ end do
+ end do
+end subroutine f2
+
+subroutine f3(x, n, step)
+ integer :: n, step
+ real :: x(n * step, n)
+ do i = 1, n
+ do j = 1, n
+ x(i * step, j) = 100
+ end do
+ end do
+end subroutine f3
+
+! { dg-final { scan-tree-dump-times {likely to be the innermost dimension} 1 "lversion" } }
+! { dg-final { scan-tree-dump-not {want to version} "lversion" } }
+! { dg-final { scan-tree-dump-not {versioned} "lversion" } }
diff --git a/gcc/testsuite/gfortran.dg/loop_versioning_3.f90 b/gcc/testsuite/gfortran.dg/loop_versioning_3.f90
new file mode 100644
index 00000000000..040b30b5148
--- /dev/null
+++ b/gcc/testsuite/gfortran.dg/loop_versioning_3.f90
@@ -0,0 +1,30 @@
+! { dg-options "-O3 -fdump-tree-lversion-details -fno-frontend-loop-interchange" }
+
+! Test a case in which the outer loop iterates over the inner dimension.
+! The options above prevent the frontend from interchanging the loops.
+
+subroutine f1(x, limit, step, n)
+ integer :: limit, step, n
+ real :: x(limit, n)
+ do i = 1, limit, step
+ do j = 1, n
+ x(i, j) = 100
+ end do
+ end do
+end subroutine f1
+
+subroutine f2(x, n, limit, step)
+ integer :: n, limit, step
+ real :: x(limit, n)
+ do i = 1, n
+ do j = 1, limit, step
+ x(j, i) = 100
+ end do
+ end do
+end subroutine f2
+
+! FIXME: The frontend doesn't give us enough information to tell which loop
+! is iterating over the innermost dimension, so we optimistically
+! assume the inner one is.
+! { dg-final { scan-tree-dump-not {want to version} "lversion" { xfail *-*-* } } }
+! { dg-final { scan-tree-dump-not {versioned} "lversion" { xfail *-*-* } } }
diff --git a/gcc/testsuite/gfortran.dg/loop_versioning_4.f90 b/gcc/testsuite/gfortran.dg/loop_versioning_4.f90
new file mode 100644
index 00000000000..2fc4d12c9d1
--- /dev/null
+++ b/gcc/testsuite/gfortran.dg/loop_versioning_4.f90
@@ -0,0 +1,95 @@
+! { dg-options "-O3 -fdump-tree-lversion-details -fno-frontend-loop-interchange" }
+
+! Test cases in which versioning is useful for a two-dimensional array.
+
+subroutine f1(x)
+ real :: x(:, :)
+ x(:, :) = 100
+end subroutine f1
+
+subroutine f2(x)
+ real :: x(:, :)
+ do i = lbound(x, 1), ubound(x, 1)
+ do j = lbound(x, 2), ubound(x, 2)
+ x(j, i) = 100
+ end do
+ end do
+end subroutine f2
+
+subroutine f3(x, n, step)
+ integer :: n, step
+ real :: x(100, 100)
+ do i = 1, n
+ do j = 1, n
+ x(j * step, i) = 100
+ end do
+ end do
+end subroutine f3
+
+subroutine f4(x, n, step)
+ integer :: n, step
+ real :: x(n * step, n)
+ do i = 1, n
+ do j = 1, n
+ x(j * step, i) = 100
+ end do
+ end do
+end subroutine f4
+
+subroutine f5(x, n, limit, step)
+ integer :: n, limit, step
+ real :: x(limit, n)
+ do i = 1, n
+ do j = 1, limit, step
+ x(j, i) = 100
+ end do
+ end do
+end subroutine f5
+
+subroutine f6(x, y)
+ real :: x(:, :), y(:)
+ do i = lbound(x, 1), ubound(x, 1)
+ do j = lbound(x, 2), ubound(x, 2)
+ x(j, i) = 100
+ end do
+ y(i) = 200
+ end do
+end subroutine f6
+
+subroutine f7(x, y, n, step)
+ integer :: n, step
+ real :: x(100, 100), y(100)
+ do i = 1, n
+ do j = 1, n
+ x(j * step, i) = 100
+ end do
+ y(i * step) = 200
+ end do
+end subroutine f7
+
+subroutine f8(x, y, n, step)
+ integer :: n, step
+ real :: x(n * step, n), y(n * step)
+ do i = 1, n
+ do j = 1, n
+ x(j * step, i) = 100
+ end do
+ y(i * step) = 200
+ end do
+end subroutine f8
+
+subroutine f9(x, n, limit, step)
+ integer :: n, limit, step
+ real :: x(limit, n), y(limit)
+ do i = 1, n
+ do j = 1, limit, step
+ x(j, i) = 100
+ end do
+ y(i) = 200
+ end do
+end subroutine f9
+
+! { dg-final { scan-tree-dump-times {likely to be the innermost dimension} 3 "lversion" } }
+! { dg-final { scan-tree-dump-times {want to version containing loop} 9 "lversion" } }
+! { dg-final { scan-tree-dump-times {hoisting check} 9 "lversion" } }
+! { dg-final { scan-tree-dump-times {versioned this loop} 9 "lversion" } }
diff --git a/gcc/testsuite/gfortran.dg/loop_versioning_5.f90 b/gcc/testsuite/gfortran.dg/loop_versioning_5.f90
new file mode 100644
index 00000000000..b08c7163382
--- /dev/null
+++ b/gcc/testsuite/gfortran.dg/loop_versioning_5.f90
@@ -0,0 +1,57 @@
+! { dg-options "-O3 -fdump-tree-lversion-details -fno-frontend-loop-interchange" }
+
+! Make sure that in a "badly nested" loop, we don't treat the inner loop
+! as iterating over the inner dimension with a variable stride.
+
+subroutine f1(x, n)
+ integer :: n
+ real :: x(100, 100)
+ do i = 1, n
+ do j = 1, n
+ x(i, j) = 100
+ end do
+ end do
+end subroutine f1
+
+subroutine f2(x, n, step)
+ integer :: n, step
+ real :: x(100, 100)
+ do i = 1, n
+ do j = 1, n
+ x(i, j * step) = 100
+ end do
+ end do
+end subroutine f2
+
+subroutine f3(x, n)
+ integer :: n
+ real :: x(n, n)
+ do i = 1, n
+ do j = 1, n
+ x(i, j) = 100
+ end do
+ end do
+end subroutine f3
+
+subroutine f4(x, n, step)
+ integer :: n, step
+ real :: x(n, n * step)
+ do i = 1, n
+ do j = 1, n
+ x(i, j * step) = 100
+ end do
+ end do
+end subroutine f4
+
+subroutine f5(x, n, limit, step)
+ integer :: n, limit, step
+ real :: x(n, limit)
+ do i = 1, n
+ do j = 1, limit, step
+ x(i, j) = 100
+ end do
+ end do
+end subroutine f5
+
+! { dg-final { scan-tree-dump-not {want to version} "lversion" } }
+! { dg-final { scan-tree-dump-not {versioned} "lversion" } }
diff --git a/gcc/testsuite/gfortran.dg/loop_versioning_6.f90 b/gcc/testsuite/gfortran.dg/loop_versioning_6.f90
new file mode 100644
index 00000000000..450a79c1fdf
--- /dev/null
+++ b/gcc/testsuite/gfortran.dg/loop_versioning_6.f90
@@ -0,0 +1,93 @@
+! { dg-options "-O3 -fdump-tree-lversion-details" }
+
+! Check that versioning can handle small groups of accesses.
+
+subroutine f1(x)
+ real :: x(:)
+ do i = lbound(x, 1), ubound(x, 1) / 2
+ x(i * 2) = 100
+ x(i * 2 + 1) = 101
+ end do
+end subroutine f1
+
+subroutine f2(x, n, step)
+ integer :: n, step
+ real :: x(n * step * 2)
+ do i = 1, n
+ x(i * step * 2) = 100
+ x(i * step * 2 + 1) = 101
+ end do
+end subroutine f2
+
+subroutine f3(x, limit, step)
+ integer :: limit, step
+ real :: x(limit * 2)
+ do i = 1, limit, step
+ x(i * 2) = 100
+ x(i * 2 + 1) = 101
+ end do
+end subroutine f3
+
+subroutine f4(x)
+ real :: x(:)
+ do i = lbound(x, 1), ubound(x, 1) / 3
+ x(i * 3) = 100
+ x(i * 3 + 1) = 101
+ x(i * 3 + 2) = 102
+ end do
+end subroutine f4
+
+subroutine f5(x, n, step)
+ integer :: n, step
+ real :: x(n * step * 3)
+ do i = 1, n
+ x(i * step * 3) = 100
+ x(i * step * 3 + 1) = 101
+ x(i * step * 3 + 2) = 102
+ end do
+end subroutine f5
+
+subroutine f6(x, limit, step)
+ integer :: limit, step
+ real :: x(limit * 3)
+ do i = 1, limit, step
+ x(i * 3) = 100
+ x(i * 3 + 1) = 101
+ x(i * 3 + 2) = 102
+ end do
+end subroutine f6
+
+subroutine f7(x)
+ real :: x(:)
+ do i = lbound(x, 1), ubound(x, 1) / 4
+ x(i * 4) = 100
+ x(i * 4 + 1) = 101
+ x(i * 4 + 2) = 102
+ x(i * 4 + 3) = 103
+ end do
+end subroutine f7
+
+subroutine f8(x, n, step)
+ integer :: n, step
+ real :: x(n * step * 4)
+ do i = 1, n
+ x(i * step * 4) = 100
+ x(i * step * 4 + 1) = 101
+ x(i * step * 4 + 2) = 102
+ x(i * step * 4 + 3) = 103
+ end do
+end subroutine f8
+
+subroutine f9(x, limit, step)
+ integer :: limit, step
+ real :: x(limit * 4)
+ do i = 1, limit, step
+ x(i * 4) = 100
+ x(i * 4 + 1) = 101
+ x(i * 4 + 2) = 102
+ x(i * 4 + 3) = 103
+ end do
+end subroutine f9
+
+! { dg-final { scan-tree-dump-times {want to version containing loop} 9 "lversion" } }
+! { dg-final { scan-tree-dump-times {versioned this loop} 9 "lversion" } }
diff --git a/gcc/testsuite/gfortran.dg/loop_versioning_7.f90 b/gcc/testsuite/gfortran.dg/loop_versioning_7.f90
new file mode 100644
index 00000000000..ba827ac3184
--- /dev/null
+++ b/gcc/testsuite/gfortran.dg/loop_versioning_7.f90
@@ -0,0 +1,67 @@
+! { dg-options "-O3 -fdump-tree-lversion-details" }
+
+! Check that versioning can handle small groups of accesses, with the
+! group being a separate array dimension.
+
+subroutine f1(x, n, step)
+ integer :: n, step
+ real :: x(2, n * step)
+ do i = 1, n
+ x(1, i * step) = 100
+ x(2, i * step) = 101
+ end do
+end subroutine f1
+
+subroutine f2(x, limit, step)
+ integer :: limit, step
+ real :: x(2, limit)
+ do i = 1, limit, step
+ x(1, i) = 100
+ x(2, i) = 101
+ end do
+end subroutine f2
+
+subroutine f3(x, n, step)
+ integer :: n, step
+ real :: x(3, n * step)
+ do i = 1, n
+ x(1, i * step) = 100
+ x(2, i * step) = 101
+ x(3, i * step) = 102
+ end do
+end subroutine f3
+
+subroutine f4(x, limit, step)
+ integer :: limit, step
+ real :: x(3, limit)
+ do i = 1, limit, step
+ x(1, i) = 100
+ x(2, i) = 101
+ x(3, i) = 102
+ end do
+end subroutine f4
+
+subroutine f5(x, n, step)
+ integer :: n, step
+ real :: x(4, n * step)
+ do i = 1, n
+ x(1, i * step) = 100
+ x(2, i * step) = 101
+ x(3, i * step) = 102
+ x(4, i * step) = 103
+ end do
+end subroutine f5
+
+subroutine f6(x, limit, step)
+ integer :: limit, step
+ real :: x(4, limit)
+ do i = 1, limit, step
+ x(1, i) = 100
+ x(2, i) = 101
+ x(3, i) = 102
+ x(4, i) = 103
+ end do
+end subroutine f6
+
+! { dg-final { scan-tree-dump-times {want to version containing loop} 6 "lversion" } }
+! { dg-final { scan-tree-dump-times {versioned this loop} 6 "lversion" } }
diff --git a/gcc/testsuite/gfortran.dg/loop_versioning_8.f90 b/gcc/testsuite/gfortran.dg/loop_versioning_8.f90
new file mode 100644
index 00000000000..193479935f4
--- /dev/null
+++ b/gcc/testsuite/gfortran.dg/loop_versioning_8.f90
@@ -0,0 +1,13 @@
+! { dg-options "-O3 -fdump-tree-lversion-details" }
+
+! Check that versioning is applied to a gather-like reduction operation.
+
+function f(x, index, n)
+ integer :: n
+ real :: x(:)
+ integer :: index(n)
+ f = sum(x(index(:)))
+end function f
+
+! { dg-final { scan-tree-dump-times {want to version containing loop} 1 "lversion" } }
+! { dg-final { scan-tree-dump-times {versioned this loop} 1 "lversion" } }
diff --git a/gcc/timevar.def b/gcc/timevar.def
index 91221ae5b0e..033b48040bb 100644
--- a/gcc/timevar.def
+++ b/gcc/timevar.def
@@ -234,6 +234,7 @@ DEFTIMEVAR (TV_DSE1 , "dead store elim1")
DEFTIMEVAR (TV_DSE2 , "dead store elim2")
DEFTIMEVAR (TV_LOOP , "loop analysis")
DEFTIMEVAR (TV_LOOP_INIT , "loop init")
+DEFTIMEVAR (TV_LOOP_VERSIONING , "loop versioning")
DEFTIMEVAR (TV_LOOP_MOVE_INVARIANTS , "loop invariant motion")
DEFTIMEVAR (TV_LOOP_UNROLL , "loop unrolling")
DEFTIMEVAR (TV_LOOP_DOLOOP , "loop doloop")
diff --git a/gcc/tree-pass.h b/gcc/tree-pass.h
index b20d34c15e9..9f9d85fdbc3 100644
--- a/gcc/tree-pass.h
+++ b/gcc/tree-pass.h
@@ -362,6 +362,7 @@ extern gimple_opt_pass *make_pass_fix_loops (gcc::context *ctxt);
extern gimple_opt_pass *make_pass_tree_loop (gcc::context *ctxt);
extern gimple_opt_pass *make_pass_tree_no_loop (gcc::context *ctxt);
extern gimple_opt_pass *make_pass_tree_loop_init (gcc::context *ctxt);
+extern gimple_opt_pass *make_pass_loop_versioning (gcc::context *ctxt);
extern gimple_opt_pass *make_pass_lim (gcc::context *ctxt);
extern gimple_opt_pass *make_pass_linterchange (gcc::context *ctxt);
extern gimple_opt_pass *make_pass_tree_unswitch (gcc::context *ctxt);
diff --git a/gcc/tree-ssa-propagate.c b/gcc/tree-ssa-propagate.c
index 1f3ccafa00e..8225ebee49e 100644
--- a/gcc/tree-ssa-propagate.c
+++ b/gcc/tree-ssa-propagate.c
@@ -1154,6 +1154,10 @@ substitute_and_fold_dom_walker::before_dom_children (basic_block bb)
/* Perform final substitution and folding of propagated values.
+ Process the whole function if BLOCK is null, otherwise only
+ process the blocks that BLOCK dominates. In the latter case,
+ it is the caller's responsibility to ensure that dominator
+ information is available and up-to-date.
PROP_VALUE[I] contains the single value that should be substituted
at every use of SSA name N_I. If PROP_VALUE is NULL, no values are
@@ -1170,16 +1174,24 @@ substitute_and_fold_dom_walker::before_dom_children (basic_block bb)
Return TRUE when something changed. */
bool
-substitute_and_fold_engine::substitute_and_fold (void)
+substitute_and_fold_engine::substitute_and_fold (basic_block block)
{
if (dump_file && (dump_flags & TDF_DETAILS))
fprintf (dump_file, "\nSubstituting values and folding statements\n\n");
memset (&prop_stats, 0, sizeof (prop_stats));
- calculate_dominance_info (CDI_DOMINATORS);
+ /* Don't call calculate_dominance_info when iterating over a subgraph.
+ Callers that are using the interface this way are likely to want to
+ iterate over several disjoint subgraphs, and it would be expensive
+ in enable-checking builds to revalidate the whole dominance tree
+ each time. */
+ if (block)
+ gcc_assert (dom_info_state (CDI_DOMINATORS));
+ else
+ calculate_dominance_info (CDI_DOMINATORS);
substitute_and_fold_dom_walker walker (CDI_DOMINATORS, this);
- walker.walk (ENTRY_BLOCK_PTR_FOR_FN (cfun));
+ walker.walk (block ? block : ENTRY_BLOCK_PTR_FOR_FN (cfun));
/* We cannot remove stmts during the BB walk, especially not release
SSA names there as that destroys the lattice of our callers.
diff --git a/gcc/tree-ssa-propagate.h b/gcc/tree-ssa-propagate.h
index 56e1b1c1379..73512f65f28 100644
--- a/gcc/tree-ssa-propagate.h
+++ b/gcc/tree-ssa-propagate.h
@@ -104,7 +104,7 @@ class substitute_and_fold_engine
virtual bool fold_stmt (gimple_stmt_iterator *) { return false; }
virtual tree get_value (tree) { return NULL_TREE; }
- bool substitute_and_fold (void);
+ bool substitute_and_fold (basic_block = NULL);
bool replace_uses_in (gimple *);
bool replace_phi_args_in (gphi *);
};
diff --git a/gcc/tree-vrp.c b/gcc/tree-vrp.c
index 864de4195da..d4470d2a6b3 100644
--- a/gcc/tree-vrp.c
+++ b/gcc/tree-vrp.c
@@ -1173,15 +1173,14 @@ value_inside_range (tree val, tree min, tree max)
}
-/* Return TRUE if *VR includes the value zero. */
+/* Return TRUE if *VR includes the value X. */
bool
-range_includes_zero_p (const value_range_base *vr)
+range_includes_p (const value_range_base *vr, HOST_WIDE_INT x)
{
if (vr->varying_p () || vr->undefined_p ())
return true;
- tree zero = build_int_cst (vr->type (), 0);
- return vr->may_contain_p (zero);
+ return vr->may_contain_p (build_int_cst (vr->type (), x));
}
/* If *VR has a value range that is a single constant value return that,
diff --git a/gcc/tree-vrp.h b/gcc/tree-vrp.h
index de3221e401c..aaf024423e6 100644
--- a/gcc/tree-vrp.h
+++ b/gcc/tree-vrp.h
@@ -243,7 +243,7 @@ struct assert_info
extern void register_edge_assert_for (tree, edge, enum tree_code,
tree, tree, vec<assert_info> &);
extern bool stmt_interesting_for_vrp (gimple *);
-extern bool range_includes_zero_p (const value_range_base *);
+extern bool range_includes_p (const value_range_base *, HOST_WIDE_INT);
extern bool infer_value_range (gimple *, tree, tree_code *, tree *);
extern bool vrp_bitmap_equal_p (const_bitmap, const_bitmap);
@@ -285,4 +285,12 @@ extern tree get_single_symbol (tree, bool *, tree *);
extern void maybe_set_nonzero_bits (edge, tree);
extern value_range_kind determine_value_range (tree, wide_int *, wide_int *);
+/* Return TRUE if *VR includes the value zero. */
+
+inline bool
+range_includes_zero_p (const value_range_base *vr)
+{
+ return range_includes_p (vr, 0);
+}
+
#endif /* GCC_TREE_VRP_H */