summaryrefslogtreecommitdiff
path: root/gcc
diff options
context:
space:
mode:
authordorit <dorit@138bc75d-0d04-0410-961f-82ee72b054a4>2004-11-19 19:39:40 +0000
committerdorit <dorit@138bc75d-0d04-0410-961f-82ee72b054a4>2004-11-19 19:39:40 +0000
commit53a6f6db574a5fcc0100914fd2d33078802108a8 (patch)
tree13edb3a9ae038b16cd0283198b9667757f2ba2fb /gcc
parent84d204b590caff145ec304547a260e55d90b5c48 (diff)
downloadgcc-53a6f6db574a5fcc0100914fd2d33078802108a8.tar.gz
PR tree-opt/18181
* tree-vectorizer.c (slpeel_tree_peel_loop_to_edge): Peeling scheme changed to suppoer uses-after-loop and to void creating flow paths that shouldn't exist. (slpeel_update_phi_nodes_for_guard): Takes additional two arguments. Modified to fit the new peeling scheme. Avoid quadratic behavior. (slpeel_add_loop_guard): Takes additional argument. (slpeel_verify_cfg_after_peeling): New function. (vect_update_ivs_after_vectorizer): Takes additional argument. Updated documentation. Use 'exit-bb' instead of creating 'new-bb'. (rename_variables_in_bb): Don't update phis for BBs out of loop, to fit the new peeling scheme. (copy_phi_nodes): Function removed. Its functionality moved to update_phis_for_duplicate_loop. (slpeel_update_phis_for_duplicate_loop): Functionality of copy_phi_nodes moved here. Added documentation. Modified to fit the new peeling scheme. (slpeel_make_loop_iterate_ntimes): Setting loop->single_exit not not needed - done in slpeel_tree_peel_loop_to_edge. (slpeel_tree_duplicate_loop_to_edge_cfg): Debug printouts compacted. (vect_do_peeling_for_loop_bound): Add documentation. Call slpeel_verify_cfg_after_peeling. Call vect_update_ivs_after_vectorizer with additional argument. (vect_do_peeling_for_alignment): Call slpeel_verify_cfg_after_peeling. (vect_finish_stmt_generation): Avoid 80 column oveflow. git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@90932 138bc75d-0d04-0410-961f-82ee72b054a4
Diffstat (limited to 'gcc')
-rw-r--r--gcc/ChangeLog28
-rw-r--r--gcc/testsuite/ChangeLog8
-rw-r--r--gcc/testsuite/gcc.dg/vect/vect-85.c48
-rw-r--r--gcc/testsuite/gcc.dg/vect/vect-86.c48
-rw-r--r--gcc/testsuite/gcc.dg/vect/vect-87.c52
-rw-r--r--gcc/testsuite/gcc.dg/vect/vect-88.c52
-rw-r--r--gcc/tree-vectorizer.c728
7 files changed, 651 insertions, 313 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index cf8c73bb24b..9026a1d7ec5 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,5 +1,33 @@
2004-11-19 Dorit Naishlos <dorit@il.ibm.com>
+ PR tree-opt/18181
+ * tree-vectorizer.c (slpeel_tree_peel_loop_to_edge): Peeling scheme
+ changed to suppoer uses-after-loop and to void creating flow paths
+ that shouldn't exist.
+ (slpeel_update_phi_nodes_for_guard): Takes additional two arguments.
+ Modified to fit the new peeling scheme. Avoid quadratic behavior.
+ (slpeel_add_loop_guard): Takes additional argument.
+ (slpeel_verify_cfg_after_peeling): New function.
+ (vect_update_ivs_after_vectorizer): Takes additional argument. Updated
+ documentation. Use 'exit-bb' instead of creating 'new-bb'.
+ (rename_variables_in_bb): Don't update phis for BBs out of loop, to fit
+ the new peeling scheme.
+ (copy_phi_nodes): Function removed. Its functionality moved to
+ update_phis_for_duplicate_loop.
+ (slpeel_update_phis_for_duplicate_loop): Functionality of copy_phi_nodes
+ moved here. Added documentation. Modified to fit the new peeling scheme.
+ (slpeel_make_loop_iterate_ntimes): Setting loop->single_exit not not
+ needed - done in slpeel_tree_peel_loop_to_edge.
+ (slpeel_tree_duplicate_loop_to_edge_cfg): Debug printouts compacted.
+ (vect_do_peeling_for_loop_bound): Add documentation. Call
+ slpeel_verify_cfg_after_peeling. Call vect_update_ivs_after_vectorizer
+ with additional argument.
+ (vect_do_peeling_for_alignment): Call slpeel_verify_cfg_after_peeling.
+
+ (vect_finish_stmt_generation): Avoid 80 column oveflow.
+
+2004-11-19 Dorit Naishlos <dorit@il.ibm.com>
+
* tree-vectorizer.c (slpeel_make_loop_iterate_ntimes): Last two
arguments removed.
(slpeel_tree_peel_loop_to_edge): Call slpeel_make_loop_iterate_ntimes
diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog
index ebd0d42f612..b08e0561e76 100644
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -1,3 +1,11 @@
+2004-11-19 Dorit Naishlos <dorit@il.ibm.com>
+
+ PR tree-opt/18181
+ * gcc.dg/vect/vect-85.c: New test.
+ * gcc.dg/vect/vect-86.c: New test.
+ * gcc.dg/vect/vect-87.c: New test.
+ * gcc.dg/vect/vect-88.c: New test.
+
2004-11-19 Ben Elliston <bje@au.ibm.com>
* gcc.dg/pr16286.c: Test __pixel and __bool keywords.
diff --git a/gcc/testsuite/gcc.dg/vect/vect-85.c b/gcc/testsuite/gcc.dg/vect/vect-85.c
new file mode 100644
index 00000000000..26063e8f155
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/vect-85.c
@@ -0,0 +1,48 @@
+/* { dg-require-effective-target vect_int } */
+
+#include <stdarg.h>
+#include "tree-vect.h"
+
+#define N 16
+
+typedef int aint __attribute__ ((__aligned__(16)));
+
+int main1 (int *a)
+{
+ int i, j, k;
+ int b[N];
+
+ for (i = 0; i < N; i++)
+ {
+ for (j = 0; j < N; j++)
+ {
+ k = i + N;
+ a[j] = k;
+ }
+ b[i] = k;
+ }
+
+
+ for (j = 0; j < N; j++)
+ if (a[j] != i + N - 1)
+ abort();
+
+ for (j = 0; j < N; j++)
+ if (b[j] != j + N)
+ abort();
+
+ return 0;
+}
+
+int main (void)
+{
+ aint a[N];
+
+ check_vect ();
+
+ main1 (a);
+
+ return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { xfail *-*-* } } } */
diff --git a/gcc/testsuite/gcc.dg/vect/vect-86.c b/gcc/testsuite/gcc.dg/vect/vect-86.c
new file mode 100644
index 00000000000..9caa887b9fc
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/vect-86.c
@@ -0,0 +1,48 @@
+/* { dg-require-effective-target vect_int } */
+
+#include <stdarg.h>
+#include "tree-vect.h"
+
+#define N 16
+
+int main1 (int n)
+{
+ int i, j, k;
+ int a[N], b[N];
+
+ for (i = 0; i < n; i++)
+ {
+ for (j = 0; j < n; j++)
+ {
+ k = i + n;
+ a[j] = k;
+ }
+ b[i] = k;
+ }
+
+
+ for (j = 0; j < n; j++)
+ if (a[j] != i + n - 1)
+ abort();
+
+ for (i = 0; i < n; i++)
+ if (b[i] != i + n)
+ abort();
+
+ return 0;
+}
+
+int main (void)
+{
+ check_vect ();
+
+ main1 (N);
+ main1 (0);
+ main1 (1);
+ main1 (2);
+ main1 (N-1);
+
+ return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
diff --git a/gcc/testsuite/gcc.dg/vect/vect-87.c b/gcc/testsuite/gcc.dg/vect/vect-87.c
new file mode 100644
index 00000000000..546178f8788
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/vect-87.c
@@ -0,0 +1,52 @@
+/* { dg-require-effective-target vect_int } */
+
+#include <stdarg.h>
+#include "tree-vect.h"
+
+#define N 16
+
+typedef int aint __attribute__ ((__aligned__(16)));
+
+int main1 (int n, int *a)
+{
+ int i, j, k;
+ int b[N];
+
+ for (i = 0; i < n; i++)
+ {
+ for (j = 0; j < n; j++)
+ {
+ k = i + n;
+ a[j] = k;
+ }
+ b[i] = k;
+ }
+
+
+ for (j = 0; j < n; j++)
+ if (a[j] != i + n - 1)
+ abort();
+
+ for (j = 0; j < n; j++)
+ if (b[j] != j + n)
+ abort();
+
+ return 0;
+}
+
+int main (void)
+{
+ aint a[N];
+
+ check_vect ();
+
+ main1 (N, a);
+ main1 (0, a);
+ main1 (1, a);
+ main1 (2, a);
+ main1 (N-1, a);
+
+ return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
diff --git a/gcc/testsuite/gcc.dg/vect/vect-88.c b/gcc/testsuite/gcc.dg/vect/vect-88.c
new file mode 100644
index 00000000000..59984eaba42
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/vect-88.c
@@ -0,0 +1,52 @@
+/* { dg-require-effective-target vect_int } */
+
+#include <stdarg.h>
+#include "tree-vect.h"
+
+#define N 16
+
+typedef int aint __attribute__ ((__aligned__(16)));
+
+int main1 (int n, int *a)
+{
+ int i, j, k;
+ int b[N];
+
+ for (i = 0; i < n; i++)
+ {
+ for (j = 0; j < n; j++)
+ {
+ k = i + n;
+ a[j] = k;
+ }
+ b[i] = k;
+ }
+
+
+ for (j = 0; j < n; j++)
+ if (a[j] != i + n - 1)
+ abort();
+
+ for (j = 0; j < n; j++)
+ if (b[j] != j + n)
+ abort();
+
+ return 0;
+}
+
+int main (void)
+{
+ aint a[N+1];
+
+ check_vect ();
+
+ main1 (N, a+1);
+ main1 (0, a+1);
+ main1 (1, a+1);
+ main1 (2, a+1);
+ main1 (N-1, a+1);
+
+ return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
diff --git a/gcc/tree-vectorizer.c b/gcc/tree-vectorizer.c
index a7cc59bd579..4430e18da66 100644
--- a/gcc/tree-vectorizer.c
+++ b/gcc/tree-vectorizer.c
@@ -164,10 +164,11 @@ static struct loop *slpeel_tree_duplicate_loop_to_edge_cfg
(struct loop *, struct loops *, edge);
static void slpeel_update_phis_for_duplicate_loop
(struct loop *, struct loop *, bool after);
-static void slpeel_update_phi_nodes_for_guard (edge, struct loop *);
+static void slpeel_update_phi_nodes_for_guard (edge, struct loop *, bool, bool);
static void slpeel_make_loop_iterate_ntimes (struct loop *, tree);
-static edge slpeel_add_loop_guard (basic_block, tree, basic_block);
+static edge slpeel_add_loop_guard (basic_block, tree, basic_block, basic_block);
static bool slpeel_can_duplicate_loop_p (struct loop *, edge);
+static void slpeel_verify_cfg_after_peeling (struct loop *, struct loop *);
static void allocate_new_names (bitmap);
static void rename_use_op (use_operand_p);
static void rename_def_op (def_operand_p, tree);
@@ -249,7 +250,7 @@ static void vect_finish_stmt_generation
static void vect_generate_tmps_on_preheader
(loop_vec_info, tree *, tree *, tree *);
static tree vect_build_loop_niters (loop_vec_info);
-static void vect_update_ivs_after_vectorizer (struct loop *, tree);
+static void vect_update_ivs_after_vectorizer (struct loop *, tree, edge);
static tree vect_gen_niters_for_prolog_loop (loop_vec_info, tree);
static void vect_update_inits_of_dr
(struct data_reference *, struct loop *, tree niters);
@@ -360,6 +361,7 @@ rename_variables_in_bb (basic_block bb)
unsigned i;
edge e;
edge_iterator ei;
+ struct loop *loop = bb->loop_father;
for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
rename_def_op (PHI_RESULT_PTR (phi), phi);
@@ -398,8 +400,12 @@ rename_variables_in_bb (basic_block bb)
}
FOR_EACH_EDGE (e, ei, bb->succs)
- for (phi = phi_nodes (e->dest); phi; phi = PHI_CHAIN (phi))
- rename_use_op (PHI_ARG_DEF_PTR_FROM_EDGE (phi, e));
+ {
+ if (!flow_bb_inside_loop_p (loop, e->dest))
+ continue;
+ for (phi = phi_nodes (e->dest); phi; phi = PHI_CHAIN (phi))
+ rename_use_op (PHI_ARG_DEF_PTR_FROM_EDGE (phi, e));
+ }
}
@@ -441,165 +447,204 @@ rename_variables_in_loop (struct loop *loop)
}
-/* This function copies phis from LOOP header to
- NEW_LOOP header. AFTER is as
- in update_phis_for_duplicate_loop function. */
-
-static void
-copy_phi_nodes (struct loop *loop, struct loop *new_loop,
- bool after)
-{
- tree phi, new_phi, def;
- edge new_e;
- edge e = (after ? loop_latch_edge (loop) : loop_preheader_edge (loop));
-
- /* Second add arguments to newly created phi nodes. */
- for (phi = phi_nodes (loop->header),
- new_phi = phi_nodes (new_loop->header);
- phi;
- phi = PHI_CHAIN (phi),
- new_phi = PHI_CHAIN (new_phi))
- {
- new_e = loop_preheader_edge (new_loop);
- def = PHI_ARG_DEF_FROM_EDGE (phi, e);
- add_phi_arg (&new_phi, def, new_e);
- }
-}
-
+/* Update the PHI nodes of NEW_LOOP.
-/* Update the PHI nodes of the NEW_LOOP. AFTER is true if the NEW_LOOP
- executes after LOOP, and false if it executes before it. */
+ NEW_LOOP is a duplicate of ORIG_LOOP.
+ AFTER indicates whether NEW_LOOP executes before or after ORIG_LOOP:
+ AFTER is true if NEW_LOOP executes after ORIG_LOOP, and false if it
+ executes before it. */
static void
-slpeel_update_phis_for_duplicate_loop (struct loop *loop,
+slpeel_update_phis_for_duplicate_loop (struct loop *orig_loop,
struct loop *new_loop, bool after)
{
- edge old_latch;
tree *new_name_ptr, new_ssa_name;
- tree phi_new, phi_old, def;
- edge orig_entry_e = loop_preheader_edge (loop);
+ tree phi_new, phi_orig;
+ tree def;
+ edge orig_loop_latch = loop_latch_edge (orig_loop);
+ edge orig_entry_e = loop_preheader_edge (orig_loop);
+ edge new_loop_exit_e = new_loop->exit_edges[0];
+ edge new_loop_entry_e = loop_preheader_edge (new_loop);
+ edge entry_arg_e = (after ? orig_loop_latch : orig_entry_e);
- /* Copy phis from loop->header to new_loop->header. */
- copy_phi_nodes (loop, new_loop, after);
+ /*
+ step 1. For each loop-header-phi:
+ Add the first phi argument for the phi in NEW_LOOP
+ (the one associated with the entry of NEW_LOOP)
+
+ step 2. For each loop-header-phi:
+ Add the second phi argument for the phi in NEW_LOOP
+ (the one associated with the latch of NEW_LOOP)
+
+ step 3. Update the phis in the successor block of NEW_LOOP.
+
+ case 1: NEW_LOOP was placed before ORIG_LOOP:
+ The successor block of NEW_LOOP is the header of ORIG_LOOP.
+ Updating the phis in the successor block can therefore be done
+ along with the scanning of the loop header phis, because the
+ header blocks of ORIG_LOOP and NEW_LOOP have exactly the same
+ phi nodes, organized in the same order.
+
+ case 2: NEW_LOOP was placed after ORIG_LOOP:
+ The successor block of NEW_LOOP is the original exit block of
+ ORIG_LOOP - the phis to be updated are the loop-closed-ssa phis.
+ We postpone updating these phis to a later stage (when
+ loop guards are added).
+ */
- old_latch = loop_latch_edge (loop);
- /* Update PHI args for the new loop latch edge, and
- the old loop preheader edge, we know that the PHI nodes
- are ordered appropriately in copy_phi_nodes. */
+ /* Scan the phis in the headers of the old and new loops
+ (they are organized in exactly the same order). */
+
for (phi_new = phi_nodes (new_loop->header),
- phi_old = phi_nodes (loop->header);
- phi_new && phi_old;
- phi_new = PHI_CHAIN (phi_new), phi_old = PHI_CHAIN (phi_old))
+ phi_orig = phi_nodes (orig_loop->header);
+ phi_new && phi_orig;
+ phi_new = PHI_CHAIN (phi_new), phi_orig = PHI_CHAIN (phi_orig))
{
- def = PHI_ARG_DEF_FROM_EDGE (phi_old, old_latch);
+ /* step 1. */
+ def = PHI_ARG_DEF_FROM_EDGE (phi_orig, entry_arg_e);
+ add_phi_arg (&phi_new, def, new_loop_entry_e);
+ /* step 2. */
+ def = PHI_ARG_DEF_FROM_EDGE (phi_orig, orig_loop_latch);
if (TREE_CODE (def) != SSA_NAME)
- continue;
+ continue;
new_name_ptr = SSA_NAME_AUX (def);
-
- /* Something defined outside of the loop. */
if (!new_name_ptr)
- continue;
+ /* Something defined outside of the loop. */
+ continue;
/* An ordinary ssa name defined in the loop. */
new_ssa_name = *new_name_ptr;
+ add_phi_arg (&phi_new, new_ssa_name, loop_latch_edge (new_loop));
- add_phi_arg (&phi_new, new_ssa_name, loop_latch_edge(new_loop));
-
- /* Update PHI args for the original loop pre-header edge. */
- if (! after)
- SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi_old, orig_entry_e),
- new_ssa_name);
+ /* step 3 (case 1). */
+ if (!after)
+ {
+ gcc_assert (new_loop_exit_e == orig_entry_e);
+ SET_PHI_ARG_DEF (phi_orig,
+ phi_arg_from_edge (phi_orig, new_loop_exit_e),
+ new_ssa_name);
+ }
}
}
/* Update PHI nodes for a guard of the LOOP.
- LOOP is supposed to have a preheader bb at which a guard condition is
- located. The true edge of this condition skips the LOOP and ends
- at the destination of the (unique) LOOP exit. The loop exit bb is supposed
- to be an empty bb (created by this transformation) with one successor.
-
- This function creates phi nodes at the LOOP exit bb. These phis need to be
- created as a result of adding true edge coming from guard.
-
- FORNOW: Only phis which have corresponding phi nodes at the header of the
- LOOP are created. Here we use the assumption that after the LOOP there
- are no uses of defs generated in LOOP.
-
- After the phis creation, the function updates the values of phi nodes at
- the LOOP exit successor bb:
-
- Original loop:
-
- bb0: loop preheader
- goto bb1
- bb1: loop header
- if (exit_cond) goto bb3 else goto bb2
- bb2: loop latch
- goto bb1
- bb3:
-
-
- After guard creation (the loop before this function):
-
- bb0: loop preheader
- if (guard_condition) goto bb4 else goto bb1
- bb1: loop header
- if (exit_cond) goto bb4 else goto bb2
- bb2: loop latch
- goto bb1
- bb4: loop exit
- (new empty bb)
- goto bb3
- bb3:
-
- This function updates the phi nodes in bb4 and in bb3, to account for the
- new edge from bb0 to bb4. */
+ Input:
+ - LOOP, GUARD_EDGE: LOOP is a loop for which we added guard code that
+ controls whether LOOP is to be executed. GUARD_EDGE is the edge that
+ originates from the guard-bb, skips LOOP and reaches the (unique) exit
+ bb of LOOP. This loop-exit-bb is an empty bb with one successor.
+ We denote this bb NEW_MERGE_BB because it had a single predecessor (the
+ LOOP header) before the guard code was added, and now it became a merge
+ point of two paths - the path that ends with the LOOP exit-edge, and
+ the path that ends with GUARD_EDGE.
+
+ This function creates and updates the relevant phi nodes to account for
+ the new incoming edge (GUARD_EDGE) into NEW_MERGE_BB:
+ 1. Create phi nodes at NEW_MERGE_BB.
+ 2. Update the phi nodes at the successor of NEW_MERGE_BB (denoted
+ UPDATE_BB). UPDATE_BB was the exit-bb of LOOP before NEW_MERGE_BB
+ was added:
+
+ ===> The CFG before the guard-code was added:
+ LOOP_header_bb:
+ if (exit_loop) goto update_bb : LOOP_header_bb
+ update_bb:
+
+ ==> The CFG after the guard-code was added:
+ guard_bb:
+ if (LOOP_guard_condition) goto new_merge_bb : LOOP_header_bb
+ LOOP_header_bb:
+ if (exit_loop_condition) goto new_merge_bb : LOOP_header_bb
+ new_merge_bb:
+ goto update_bb
+ update_bb:
+
+ - ENTRY_PHIS: If ENTRY_PHIS is TRUE, this indicates that the phis in
+ UPDATE_BB are loop entry phis, like the phis in the LOOP header,
+ organized in the same order.
+ If ENTRY_PHIs is FALSE, this indicates that the phis in UPDATE_BB are
+ loop exit phis.
+
+ - IS_NEW_LOOP: TRUE if LOOP is a new loop (a duplicated copy of another
+ "original" loop). FALSE if LOOP is an original loop (not a newly
+ created copy). The SSA_NAME_AUX fields of the defs in the origianl
+ loop are the corresponding new ssa-names used in the new duplicated
+ loop copy. IS_NEW_LOOP indicates which of the two args of the phi
+ nodes in UPDATE_BB takes the original ssa-name, and which takes the
+ new name: If IS_NEW_LOOP is TRUE, the phi-arg that is associated with
+ the LOOP-exit-edge takes the new-name, and the phi-arg that is
+ associated with GUARD_EDGE takes the original name. If IS_NEW_LOOP is
+ FALSE, it's the other way around.
+ */
static void
-slpeel_update_phi_nodes_for_guard (edge guard_true_edge, struct loop * loop)
+slpeel_update_phi_nodes_for_guard (edge guard_edge,
+ struct loop *loop,
+ bool entry_phis,
+ bool is_new_loop)
{
- tree phi, phi1;
- basic_block bb = loop->exit_edges[0]->dest;
-
- for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
- {
- tree new_phi;
- tree phi_arg;
-
- /* Generate new phi node. */
- new_phi = create_phi_node (SSA_NAME_VAR (PHI_RESULT (phi)), bb);
+ tree orig_phi, new_phi, update_phi;
+ tree guard_arg, loop_arg;
+ basic_block new_merge_bb = guard_edge->dest;
+ edge e = EDGE_SUCC (new_merge_bb, 0);
+ basic_block update_bb = e->dest;
+ basic_block orig_bb = (entry_phis ? loop->header : update_bb);
+
+ for (orig_phi = phi_nodes (orig_bb), update_phi = phi_nodes (update_bb);
+ orig_phi && update_phi;
+ orig_phi = PHI_CHAIN (orig_phi), update_phi = PHI_CHAIN (update_phi))
+ {
+ /* 1. Generate new phi node in NEW_MERGE_BB: */
+ new_phi = create_phi_node (SSA_NAME_VAR (PHI_RESULT (orig_phi)),
+ new_merge_bb);
+
+ /* 2. NEW_MERGE_BB has two incoming edges: GUARD_EDGE and the exit-edge
+ of LOOP. Set the two phi args in NEW_PHI for these edges: */
+ if (entry_phis)
+ {
+ loop_arg = PHI_ARG_DEF_FROM_EDGE (orig_phi,
+ EDGE_SUCC (loop->latch, 0));
+ guard_arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, loop->entry_edges[0]);
+ }
+ else /* exit phis */
+ {
+ tree orig_def = PHI_ARG_DEF_FROM_EDGE (orig_phi, e);
+ tree *new_name_ptr = SSA_NAME_AUX (orig_def);
+ tree new_name;
+
+ if (new_name_ptr)
+ new_name = *new_name_ptr;
+ else
+ /* Something defined outside of the loop */
+ new_name = orig_def;
+
+ if (is_new_loop)
+ {
+ guard_arg = orig_def;
+ loop_arg = new_name;
+ }
+ else
+ {
+ guard_arg = new_name;
+ loop_arg = orig_def;
+ }
+ }
+ add_phi_arg (&new_phi, loop_arg, loop->exit_edges[0]);
+ add_phi_arg (&new_phi, guard_arg, guard_edge);
- /* Add argument coming from guard true edge. */
- phi_arg = PHI_ARG_DEF_FROM_EDGE (phi, loop->entry_edges[0]);
- add_phi_arg (&new_phi, phi_arg, guard_true_edge);
+ /* 3. Update phi in successor block. */
+ gcc_assert (PHI_ARG_DEF_FROM_EDGE (update_phi, e) == loop_arg
+ || PHI_ARG_DEF_FROM_EDGE (update_phi, e) == guard_arg);
+ SET_PHI_ARG_DEF (update_phi, phi_arg_from_edge (update_phi, e),
+ PHI_RESULT (new_phi));
+ }
- /* Add argument coming from loop exit edge. */
- phi_arg = PHI_ARG_DEF_FROM_EDGE (phi, EDGE_SUCC (loop->latch, 0));
- add_phi_arg (&new_phi, phi_arg, loop->exit_edges[0]);
-
- /* Update all phi nodes at the loop exit successor. */
- for (phi1 = phi_nodes (EDGE_SUCC (bb, 0)->dest);
- phi1;
- phi1 = PHI_CHAIN (phi1))
- {
- tree old_arg = PHI_ARG_DEF_FROM_EDGE (phi1, EDGE_SUCC (bb, 0));
- if (old_arg == phi_arg)
- {
- edge e = EDGE_SUCC (bb, 0);
-
- SET_PHI_ARG_DEF (phi1,
- phi_arg_from_edge (phi1, e),
- PHI_RESULT (new_phi));
- }
- }
- }
-
- set_phi_nodes (bb, phi_reverse (phi_nodes (bb)));
+ set_phi_nodes (new_merge_bb, phi_reverse (phi_nodes (new_merge_bb)));
}
@@ -618,8 +663,6 @@ slpeel_make_loop_iterate_ntimes (struct loop *loop, tree niters)
tree begin_label = tree_block_label (loop->latch);
tree exit_label = tree_block_label (loop->single_exit->dest);
- /* Flow loop scan does not update loop->single_exit field. */
- loop->single_exit = loop->exit_edges[0];
orig_cond = get_loop_exit_condition (loop);
gcc_assert (orig_cond);
create_iv (integer_zero_node, integer_one_node, NULL_TREE, loop,
@@ -630,7 +673,6 @@ slpeel_make_loop_iterate_ntimes (struct loop *loop, tree niters)
bsi_next (&loop_exit_bsi);
gcc_assert (bsi_stmt (loop_exit_bsi) == orig_cond);
-
if (exit_edge->flags & EDGE_TRUE_VALUE) /* 'then' edge exits the loop. */
cond = build2 (GE_EXPR, boolean_type_node, indx_after_incr, niters);
else /* 'then' edge loops back. */
@@ -670,8 +712,7 @@ slpeel_tree_duplicate_loop_to_edge_cfg (struct loop *loop, struct loops *loops,
if (!at_exit && e != loop_preheader_edge (loop))
{
if (dump_file && (dump_flags & TDF_DETAILS))
- fprintf (dump_file,
- "Edge is not an entry nor an exit edge.\n");
+ fprintf (dump_file, "Edge is not an entry nor an exit edge.\n");
return NULL;
}
@@ -681,8 +722,7 @@ slpeel_tree_duplicate_loop_to_edge_cfg (struct loop *loop, struct loops *loops,
if (!can_copy_bbs_p (bbs, loop->num_nodes))
{
if (vect_debug_stats (loop) || vect_debug_details (loop))
- fprintf (dump_file,
- "Cannot copy basic blocks.\n");
+ fprintf (dump_file, "Cannot copy basic blocks.\n");
free (bbs);
return NULL;
}
@@ -692,8 +732,7 @@ slpeel_tree_duplicate_loop_to_edge_cfg (struct loop *loop, struct loops *loops,
if (!new_loop)
{
if (vect_debug_stats (loop) || vect_debug_details (loop))
- fprintf (dump_file,
- "The duplicate_loop returns NULL.\n");
+ fprintf (dump_file, "duplicate_loop returns NULL.\n");
free (bbs);
return NULL;
}
@@ -776,7 +815,8 @@ slpeel_tree_duplicate_loop_to_edge_cfg (struct loop *loop, struct loops *loops,
Returns the skip edge. */
static edge
-slpeel_add_loop_guard (basic_block guard_bb, tree cond, basic_block exit_bb)
+slpeel_add_loop_guard (basic_block guard_bb, tree cond, basic_block exit_bb,
+ basic_block dom_bb)
{
block_stmt_iterator bsi;
edge new_e, enter_e;
@@ -796,7 +836,7 @@ slpeel_add_loop_guard (basic_block guard_bb, tree cond, basic_block exit_bb)
bsi_insert_after (&bsi, cond_stmt, BSI_NEW_STMT);
/* Add new edge to connect entry block to the second loop. */
new_e = make_edge (guard_bb, exit_bb, EDGE_TRUE_VALUE);
- set_immediate_dominator (CDI_DOMINATORS, exit_bb, guard_bb);
+ set_immediate_dominator (CDI_DOMINATORS, exit_bb, dom_bb);
return new_e;
}
@@ -837,61 +877,77 @@ slpeel_can_duplicate_loop_p (struct loop *loop, edge e)
}
-/* Given LOOP this function duplicates it to the edge E.
-
- This transformation takes place before the loop is vectorized.
- For now, there are two main cases when it's used
- by the vectorizer: to support loops with unknown loop bounds
- (or loop bounds indivisible by vectorization factor) and to force the
- alignment of data references in the loop. In the first case, LOOP is
- duplicated to the exit edge, producing epilog loop. In the second case, LOOP
- is duplicated to the preheader edge thus generating prolog loop. In both
- cases, the original loop will be vectorized after the transformation.
-
- The edge E is supposed to be either preheader edge of the LOOP or
- its exit edge. If preheader edge is specified, the LOOP copy
- will precede the original one. Otherwise the copy will be located
- at the exit of the LOOP.
+static void
+slpeel_verify_cfg_after_peeling (struct loop *first_loop,
+ struct loop *second_loop)
+{
+ basic_block loop1_exit_bb = first_loop->exit_edges[0]->dest;
+ basic_block loop2_entry_bb = second_loop->pre_header;
+ basic_block loop1_entry_bb = loop_preheader_edge (first_loop)->src;
+
+ /* A guard that controls whether the second_loop is to be executed or skipped
+ is placed in first_loop->exit. first_loopt->exit therefore has two
+ successors - one is the preheader of second_loop, and the other is a bb
+ after second_loop.
+ */
+ gcc_assert (EDGE_COUNT (loop1_exit_bb->succs) == 2);
- FIRST_NITERS (SSA_NAME) parameter specifies how many times to iterate
- the first loop. If UPDATE_FIRST_LOOP_COUNT parameter is false, the first
- loop will be iterated FIRST_NITERS times by introducing additional
- induction variable and replacing loop exit condition. If
- UPDATE_FIRST_LOOP_COUNT is true no change to the first loop is made and
- the caller to tree_duplicate_loop_to_edge is responsible for updating
- the first loop count.
- NITERS (also SSA_NAME) parameter defines the number of iteration the
- original loop iterated. The function generates two if-then guards:
- one prior to the first loop and the other prior to the second loop.
- The first guard will be:
-
- if (FIRST_NITERS == 0) then skip the first loop
+ /* 1. Verify that one of the successors of first_loopt->exit is the preheader
+ of second_loop. */
- The second guard will be:
+ /* The preheader of new_loop is expected to have two predessors:
+ first_loop->exit and the block that precedes first_loop. */
+
+ gcc_assert (EDGE_COUNT (loop2_entry_bb->preds) == 2
+ && ((EDGE_PRED (loop2_entry_bb, 0)->src == loop1_exit_bb
+ && EDGE_PRED (loop2_entry_bb, 1)->src == loop1_entry_bb)
+ || (EDGE_PRED (loop2_entry_bb, 1)->src == loop1_exit_bb
+ && EDGE_PRED (loop2_entry_bb, 0)->src == loop1_entry_bb)));
+
+ /* Verify that the other successor of first_loopt->exit is after the
+ second_loop. */
+ /* TODO */
+}
- if (FIRST_NITERS == NITERS) then skip the second loop
- Thus the equivalence to the original code is guaranteed by correct values
- of NITERS and FIRST_NITERS and generation of if-then loop guards.
+/* Function slpeel_tree_peel_loop_to_edge.
- For now this function supports only loop forms that are candidate for
- vectorization. Such types are the following:
+ Peel the first (last) iterations of LOOP into a new prolog (epilog) loop
+ that is placed on the entry (exit) edge E of LOOP. After this transformation
+ we have two loops one after the other - first-loop iterates FIRST_NITERS
+ times, and second-loop iterates the remainder NITERS - FIRST_NITERS times.
- (1) only innermost loops
- (2) loops built from 2 basic blocks
- (3) loops with one entry and one exit
- (4) loops without function calls
- (5) loops without defs that are used after the loop
+ Input:
+ - LOOP: the loop to be peeled.
+ - E: the exit or entry edge of LOOP.
+ If it is the entry edge, we peel the first iterations of LOOP. In this
+ case first-loop is LOOP, and second-loop is the newly created loop.
+ If it is the exit edge, we peel the last iterations of LOOP. In this
+ case, first-loop is the newly created loop, and second-loop is LOOP.
+ - NITERS: the number of iterations that LOOP iterates.
+ - FIRST_NITERS: the number of iterations that the first-loop should iterate.
+ - UPDATE_FIRST_LOOP_COUNT: specified whether this function is responssible
+ for updating the loop bound of the first-loop to FIRST_NITERS. If it
+ is false, the caller of this function may want to take care of this
+ (this can be usefull is we don't want new stmts added to first-loop).
- (1), (3) are checked in this function; (2) - in function
- vect_analyze_loop_form; (4) - in function vect_analyze_data_refs;
- (5) is checked as part of the function vect_mark_stmts_to_be_vectorized,
- when excluding induction/reduction support.
+ Output:
+ The function returns a pointer to the new loop-copy, or NULL if it failed
+ to perform the trabsformation.
+
+ The function generates two if-then-else guards: one before the first loop,
+ and the other before the second loop:
+ The first guard is:
+ if (FIRST_NITERS == 0) then skip the first loop,
+ and go directly to the second loop.
+ The second guard is:
+ if (FIRST_NITERS == NITERS) then skip the second loop.
+
+ FORNOW only simple loops are supported (see slpeel_can_duplicate_loop_p).
+ FORNOW the resulting code will not be in loop-closed-ssa form.
+*/
- The function returns NULL in case one of these checks or
- transformations failed. */
-
struct loop*
slpeel_tree_peel_loop_to_edge (struct loop *loop, struct loops *loops,
edge e, tree first_niters,
@@ -901,117 +957,151 @@ slpeel_tree_peel_loop_to_edge (struct loop *loop, struct loops *loops,
edge skip_e;
tree pre_condition;
bitmap definitions;
- basic_block first_exit_bb, second_exit_bb;
- basic_block pre_header_bb;
+ basic_block bb_before_second_loop, bb_after_second_loop;
+ basic_block bb_before_first_loop;
+ basic_block bb_between_loops;
edge exit_e = loop->exit_edges [0];
-
+
if (!slpeel_can_duplicate_loop_p (loop, e))
return NULL;
-
- /* We have to initialize cfg_hooks. Then, when calling
+
+ /* We have to initialize cfg_hooks. Then, when calling
cfg_hooks->split_edge, the function tree_split_edge
- is actually called and, when calling cfg_hooks->duplicate_block,
+ is actually called and, when calling cfg_hooks->duplicate_block,
the function tree_duplicate_bb is called. */
tree_register_cfg_hooks ();
- /* 1. Generate a copy of LOOP and put it on E (entry or exit). */
+
+ /* 1. Generate a copy of LOOP and put it on E (E is the entry/exit of LOOP).
+ Resulting CFG would be:
+
+ first_loop:
+ do {
+ } while ...
+
+ second_loop:
+ do {
+ } while ...
+
+ orig_exit_bb:
+ */
+
if (!(new_loop = slpeel_tree_duplicate_loop_to_edge_cfg (loop, loops, e)))
{
- if (vect_debug_stats (loop) || vect_debug_details (loop))
- fprintf (dump_file,
- "The tree_duplicate_loop_to_edge_cfg failed.\n");
+ if (vect_debug_stats (loop) || vect_debug_details (loop))
+ fprintf (dump_file, "tree_duplicate_loop_to_edge_cfg failed.\n");
return NULL;
}
-
- definitions = marked_ssa_names ();
- allocate_new_names (definitions);
- slpeel_update_phis_for_duplicate_loop (loop, new_loop, e == exit_e);
- /* Here, using assumption (5), we do not propagate new names further
- than on phis of the exit from the second loop. */
- rename_variables_in_loop (new_loop);
- free_new_names (definitions);
-
+
if (e == exit_e)
{
+ /* NEW_LOOP was placed after LOOP. */
first_loop = loop;
second_loop = new_loop;
}
- else
+ else
{
+ /* NEW_LOOP was placed before LOOP. */
first_loop = new_loop;
second_loop = loop;
}
- /* 2. Generate bb between the loops. */
- first_exit_bb = split_edge (first_loop->exit_edges[0]);
- add_bb_to_loop (first_exit_bb, first_loop->outer);
+ definitions = marked_ssa_names ();
+ allocate_new_names (definitions);
+ slpeel_update_phis_for_duplicate_loop (loop, new_loop, e == exit_e);
+ rename_variables_in_loop (new_loop);
+
- /* We need to update here first loop exit edge
- and second loop preheader edge. */
- flow_loop_scan (first_loop, LOOP_ALL);
- flow_loop_scan (second_loop, LOOP_ALL);
- /* Flow loop scan does not update loop->single_exit field. */
- first_loop->single_exit = first_loop->exit_edges[0];
- second_loop->single_exit = second_loop->exit_edges[0];
+ /* 2. Add the guard that controls whether the first loop is executed.
+ Resulting CFG would be:
- /* 3. Make first loop iterate FIRST_NITERS times, if needed. */
- if (!update_first_loop_count)
- slpeel_make_loop_iterate_ntimes (first_loop, first_niters);
-
- /* 4. Add the guard before first loop:
+ bb_before_first_loop:
+ if (FIRST_NITERS == 0) GOTO bb_before_second_loop
+ GOTO first-loop
+
+ first_loop:
+ do {
+ } while ...
- if FIRST_NITERS == 0
- skip first loop
- else
- enter first loop */
+ bb_before_second_loop:
- /* 4a. Generate bb before first loop. */
- pre_header_bb = split_edge (loop_preheader_edge (first_loop));
- add_bb_to_loop (pre_header_bb, first_loop->outer);
+ second_loop:
+ do {
+ } while ...
- /* First loop preheader edge is changed. */
+ orig_exit_bb:
+ */
+
+ bb_before_first_loop = split_edge (loop_preheader_edge (first_loop));
+ add_bb_to_loop (bb_before_first_loop, first_loop->outer);
+ bb_before_second_loop = split_edge (first_loop->exit_edges[0]);
+ add_bb_to_loop (bb_before_second_loop, first_loop->outer);
flow_loop_scan (first_loop, LOOP_ALL);
+ flow_loop_scan (second_loop, LOOP_ALL);
+
+ pre_condition =
+ build (LE_EXPR, boolean_type_node, first_niters, integer_zero_node);
+ skip_e = slpeel_add_loop_guard (bb_before_first_loop, pre_condition,
+ bb_before_second_loop, bb_before_first_loop);
+ slpeel_update_phi_nodes_for_guard (skip_e, first_loop, true /* entry-phis */,
+ first_loop == new_loop);
+
- /* 4b. Generate guard condition. */
- pre_condition = build (LE_EXPR, boolean_type_node,
- first_niters, integer_zero_node);
+ /* 3. Add the guard that controls whether the second loop is executed.
+ Resulting CFG would be:
- /* 4c. Add condition at the end of preheader bb. */
- skip_e = slpeel_add_loop_guard (pre_header_bb, pre_condition, first_exit_bb);
+ bb_before_first_loop:
+ if (FIRST_NITERS == 0) GOTO bb_before_second_loop (skip first loop)
+ GOTO first-loop
- /* 4d. Update phis at first loop exit and propagate changes
- to the phis of second loop. */
- slpeel_update_phi_nodes_for_guard (skip_e, first_loop);
+ first_loop:
+ do {
+ } while ...
- /* 5. Add the guard before second loop:
+ bb_between_loops:
+ if (FIRST_NITERS == NITERS) GOTO bb_after_second_loop (skip second loop)
+ GOTO bb_before_second_loop
- if FIRST_NITERS == NITERS SKIP
- skip second loop
- else
- enter second loop */
+ bb_before_second_loop:
- /* 5a. Generate empty bb at the exit from the second loop. */
- second_exit_bb = split_edge (second_loop->exit_edges[0]);
- add_bb_to_loop (second_exit_bb, second_loop->outer);
+ second_loop:
+ do {
+ } while ...
- /* Second loop preheader edge is changed. */
+ bb_after_second_loop:
+
+ orig_exit_bb:
+ */
+
+ bb_between_loops = split_edge (first_loop->exit_edges[0]);
+ add_bb_to_loop (bb_between_loops, first_loop->outer);
+ bb_after_second_loop = split_edge (second_loop->exit_edges[0]);
+ add_bb_to_loop (bb_after_second_loop, second_loop->outer);
+ flow_loop_scan (first_loop, LOOP_ALL);
flow_loop_scan (second_loop, LOOP_ALL);
- /* 5b. Generate guard condition. */
- pre_condition = build (EQ_EXPR, boolean_type_node,
- first_niters, niters);
+ pre_condition = build (EQ_EXPR, boolean_type_node, first_niters, niters);
+ skip_e = slpeel_add_loop_guard (bb_between_loops, pre_condition,
+ bb_after_second_loop, bb_before_first_loop);
+ slpeel_update_phi_nodes_for_guard (skip_e, second_loop, false /* exit-phis */,
+ second_loop == new_loop);
- /* 5c. Add condition at the end of preheader bb. */
- skip_e = slpeel_add_loop_guard (first_exit_bb, pre_condition, second_exit_bb);
- slpeel_update_phi_nodes_for_guard (skip_e, second_loop);
+ /* Flow loop scan does not update loop->single_exit field. */
+ first_loop->single_exit = first_loop->exit_edges[0];
+ second_loop->single_exit = second_loop->exit_edges[0];
+ /* 4. Make first-loop iterate FIRST_NITERS times, if requested.
+ */
+ if (update_first_loop_count)
+ slpeel_make_loop_iterate_ntimes (first_loop, first_niters);
+
+ free_new_names (definitions);
BITMAP_XFREE (definitions);
unmark_all_for_rewrite ();
-
+
return new_loop;
}
-
/* Here the proper Vectorizer starts. */
@@ -2094,7 +2184,8 @@ vect_finish_stmt_generation (tree stmt, tree vec_stmt, block_stmt_iterator *bsi)
/* Make sure bsi points to the stmt that is being vectorized. */
/* Assumption: any stmts created for the vectorization of stmt S were
- inserted before S. BSI is expected to point to S or some new stmt before S. */
+ inserted before S. BSI is expected to point to S or some new stmt before S.
+ */
while (stmt != bsi_stmt (*bsi) && !bsi_end_p (*bsi))
bsi_next (bsi);
@@ -2837,24 +2928,13 @@ vect_transform_loop_bound (loop_vec_info loop_vinfo, tree niters)
of LOOP were peeled.
- NITERS - the number of iterations that LOOP executes (before it is
vectorized). i.e, the number of times the ivs should be bumped.
+ - UPDATE_E - a successor edge of LOOP->exit that is on the (only) path
+ coming out from LOOP on which there are uses of the LOOP ivs
+ (this is the path from LOOP->exit to epilog_loop->preheader).
- We have:
-
- bb_before_loop:
- if (guard-cond) GOTO bb_before_epilog_loop
- else GOTO loop
-
- loop:
- do {
- } while ...
-
- bb_before_epilog_loop:
-
- bb_before_epilog_loop has edges coming in form the loop exit and
- from bb_before_loop. New definitions for ivs will be placed on the edge
- from loop->exit to bb_before_epilog_loop. This also requires that we update
- the phis in bb_before_epilog_loop. (In the code this bb is denoted
- "update_bb").
+ The new definitions of the ivs are placed in LOOP->exit.
+ The phi args associated with the edge UPDATE_E in the bb
+ UPDATE_E->dest are updated accordingly.
Assumption 1: Like the rest of the vectorizer, this function assumes
a single loop exit that has a single predecessor.
@@ -2864,23 +2944,26 @@ vect_transform_loop_bound (loop_vec_info loop_vinfo, tree niters)
Assumption 3: The access function of the ivs is simple enough (see
vect_can_advance_ivs_p). This assumption will be relaxed in the future.
+
+ Assumption 4: Exactly one of the successors of LOOP exit-bb is on a path
+ coming out of LOOP on which the ivs of LOOP are used (this is the path
+ that leads to the epilog loop; other paths skip the epilog loop). This
+ path starts with the edge UPDATE_E, and its destination (denoted update_bb)
+ needs to have its phis updated.
*/
static void
-vect_update_ivs_after_vectorizer (struct loop *loop, tree niters)
+vect_update_ivs_after_vectorizer (struct loop *loop, tree niters, edge update_e)
{
- edge exit = loop->exit_edges[0];
+ basic_block exit_bb = loop->exit_edges[0]->dest;
tree phi, phi1;
- basic_block update_bb = exit->dest;
- edge update_e;
+ basic_block update_bb = update_e->dest;
- /* Generate basic block at the exit from the loop. */
- basic_block new_bb = split_edge (exit);
+ /* gcc_assert (vect_can_advance_ivs_p (loop)); */
+
+ /* Make sure there exists a single-predecessor exit bb: */
+ gcc_assert (EDGE_COUNT (exit_bb->preds) == 1);
- add_bb_to_loop (new_bb, EDGE_SUCC (new_bb, 0)->dest->loop_father);
- loop->exit_edges[0] = EDGE_PRED (new_bb, 0);
- update_e = EDGE_SUCC (new_bb, 0);
-
for (phi = phi_nodes (loop->header), phi1 = phi_nodes (update_bb);
phi && phi1;
phi = PHI_CHAIN (phi), phi1 = PHI_CHAIN (phi1))
@@ -2892,9 +2975,7 @@ vect_update_ivs_after_vectorizer (struct loop *loop, tree niters)
tree var, stmt, ni, ni_name;
block_stmt_iterator last_bsi;
- /* Skip virtual phi's. The data dependences that are associated with
- virtual defs/uses (i.e., memory accesses) are analyzed elsewhere. */
-
+ /* Skip virtual phi's. */
if (!is_gimple_reg (SSA_NAME_VAR (PHI_RESULT (phi))))
{
if (vect_debug_details (NULL))
@@ -2906,10 +2987,10 @@ vect_update_ivs_after_vectorizer (struct loop *loop, tree niters)
gcc_assert (access_fn);
evolution_part =
unshare_expr (evolution_part_in_loop_num (access_fn, loop->num));
+ gcc_assert (evolution_part != NULL_TREE);
- /* FORNOW: We do not transform initial conditions of IVs
- which evolution functions are a polynomial of degree >= 2 or
- exponential. */
+ /* FORNOW: We do not support IVs whose evolution function is a polynomial
+ of degree >= 2 or exponential. */
gcc_assert (!tree_is_chrec (evolution_part));
step_expr = evolution_part;
@@ -2924,12 +3005,12 @@ vect_update_ivs_after_vectorizer (struct loop *loop, tree niters)
ni_name = force_gimple_operand (ni, &stmt, false, var);
- /* Insert stmt into new_bb. */
- last_bsi = bsi_last (new_bb);
+ /* Insert stmt into exit_bb. */
+ last_bsi = bsi_last (exit_bb);
if (stmt)
- bsi_insert_after (&last_bsi, stmt, BSI_NEW_STMT);
+ bsi_insert_before (&last_bsi, stmt, BSI_SAME_STMT);
- /* Fix phi expressions in duplicated loop. */
+ /* Fix phi expressions in the successor bb. */
gcc_assert (PHI_ARG_DEF_FROM_EDGE (phi1, update_e) ==
PHI_ARG_DEF_FROM_EDGE (phi, EDGE_SUCC (loop->latch, 0)));
SET_PHI_ARG_DEF (phi1, phi_arg_from_edge (phi1, update_e), ni_name);
@@ -2937,21 +3018,28 @@ vect_update_ivs_after_vectorizer (struct loop *loop, tree niters)
}
-/* This function is the main driver of transformation
- to be done for loop before vectorizing it in case of
- unknown loop bound. */
+/* Function vect_do_peeling_for_loop_bound
+
+ Peel the last iterations of the loop represented by LOOP_VINFO.
+ The peeled iterations form a new epilog loop. Given that the loop now
+ iterates NITERS times, the new epilog loop iterates
+ NITERS % VECTORIZATION_FACTOR times.
+
+ The original loop will later be made to iterate
+ NITERS / VECTORIZATION_FACTOR times (this value is placed into RATIO). */
static void
-vect_do_peeling_for_loop_bound (loop_vec_info loop_vinfo, tree * ratio,
+vect_do_peeling_for_loop_bound (loop_vec_info loop_vinfo, tree *ratio,
struct loops *loops)
{
tree ni_name, ratio_mult_vf_name;
+ struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
+ struct loop *new_loop;
+ edge update_e;
#ifdef ENABLE_CHECKING
int loop_num;
#endif
- struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
- struct loop *new_loop;
if (vect_debug_details (NULL))
fprintf (dump_file, "\n<<vect_transtorm_for_unknown_loop_bound>>\n");
@@ -2972,23 +3060,32 @@ vect_do_peeling_for_loop_bound (loop_vec_info loop_vinfo, tree * ratio,
loop_num = loop->num;
#endif
new_loop = slpeel_tree_peel_loop_to_edge (loop, loops, loop->exit_edges[0],
- ratio_mult_vf_name, ni_name, true);
+ ratio_mult_vf_name, ni_name, false);
#ifdef ENABLE_CHECKING
gcc_assert (new_loop);
gcc_assert (loop_num == loop->num);
+ slpeel_verify_cfg_after_peeling (loop, new_loop);
#endif
+ /* A guard that controls whether the new_loop is to be executed or skipped
+ is placed in LOOP->exit. LOOP->exit therefore has two successors - one
+ is the preheader of NEW_LOOP, where the IVs from LOOP are used. The other
+ is a bb after NEW_LOOP, where these IVs are not used. Find the edge that
+ is on the path where the LOOP IVs are used and need to be updated. */
+
+ if (EDGE_PRED (new_loop->pre_header, 0)->src == loop->exit_edges[0]->dest)
+ update_e = EDGE_PRED (new_loop->pre_header, 0);
+ else
+ update_e = EDGE_PRED (new_loop->pre_header, 1);
+
/* Update IVs of original loop as if they were advanced
by ratio_mult_vf_name steps. */
+ vect_update_ivs_after_vectorizer (loop, ratio_mult_vf_name, update_e);
-#ifdef ENABLE_CHECKING
- /* Check existence of intermediate bb. */
- gcc_assert (loop->exit_edges[0]->dest == new_loop->pre_header);
-#endif
- vect_update_ivs_after_vectorizer (loop, ratio_mult_vf_name);
+ /* After peeling we have to reset scalar evolution analyzer. */
+ scev_reset ();
return;
-
}
@@ -3133,6 +3230,7 @@ vect_do_peeling_for_alignment (loop_vec_info loop_vinfo, struct loops *loops)
struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
tree niters_of_prolog_loop, ni_name;
tree n_iters;
+ struct loop *new_loop;
if (vect_debug_details (NULL))
fprintf (dump_file, "\n<<vect_do_peeling_for_alignment>>\n");
@@ -3140,17 +3238,21 @@ vect_do_peeling_for_alignment (loop_vec_info loop_vinfo, struct loops *loops)
ni_name = vect_build_loop_niters (loop_vinfo);
niters_of_prolog_loop = vect_gen_niters_for_prolog_loop (loop_vinfo, ni_name);
-
/* Peel the prolog loop and iterate it niters_of_prolog_loop. */
- slpeel_tree_peel_loop_to_edge (loop, loops, loop_preheader_edge(loop),
- niters_of_prolog_loop, ni_name, false);
+ new_loop =
+ slpeel_tree_peel_loop_to_edge (loop, loops, loop_preheader_edge (loop),
+ niters_of_prolog_loop, ni_name, true);
+#ifdef ENABLE_CHECKING
+ gcc_assert (new_loop);
+ slpeel_verify_cfg_after_peeling (new_loop, loop);
+#endif
/* Update number of times loop executes. */
n_iters = LOOP_VINFO_NITERS (loop_vinfo);
LOOP_VINFO_NITERS (loop_vinfo) =
build (MINUS_EXPR, integer_type_node, n_iters, niters_of_prolog_loop);
- /* Update all inits of access functions of all data refs. */
+ /* Update the init conditions of the access functions of all data refs. */
vect_update_inits_of_drs (loop_vinfo, niters_of_prolog_loop);
/* After peeling we have to reset scalar evolution analyzer. */