summaryrefslogtreecommitdiff
path: root/gcc/tree-loop-distribution.c
diff options
context:
space:
mode:
Diffstat (limited to 'gcc/tree-loop-distribution.c')
-rw-r--r--gcc/tree-loop-distribution.c155
1 files changed, 143 insertions, 12 deletions
diff --git a/gcc/tree-loop-distribution.c b/gcc/tree-loop-distribution.c
index 30091453e39..fbb891fdedf 100644
--- a/gcc/tree-loop-distribution.c
+++ b/gcc/tree-loop-distribution.c
@@ -90,6 +90,7 @@ along with GCC; see the file COPYING3. If not see
data reuse. */
#include "config.h"
+#define INCLUDE_ALGORITHM /* stable_sort */
#include "system.h"
#include "coretypes.h"
#include "backend.h"
@@ -106,6 +107,7 @@ along with GCC; see the file COPYING3. If not see
#include "stor-layout.h"
#include "tree-cfg.h"
#include "tree-ssa-loop-manip.h"
+#include "tree-ssa-loop-ivopts.h"
#include "tree-ssa-loop.h"
#include "tree-into-ssa.h"
#include "tree-ssa.h"
@@ -604,6 +606,10 @@ struct builtin_info
tree dst_base;
tree src_base;
tree size;
+ /* Base and offset part of dst_base after stripping constant offset. This
+ is only used in memset builtin distribution for now. */
+ tree dst_base_base;
+ unsigned HOST_WIDE_INT dst_base_offset;
};
/* Partition for loop distribution. */
@@ -1286,12 +1292,12 @@ build_rdg_partition_for_vertex (struct graph *rdg, int v)
return partition;
}
-/* Given PARTITION of RDG, record single load/store data references for
- builtin partition in SRC_DR/DST_DR, return false if there is no such
+/* Given PARTITION of LOOP and RDG, record single load/store data references
+ for builtin partition in SRC_DR/DST_DR, return false if there is no such
data references. */
static bool
-find_single_drs (struct graph *rdg, partition *partition,
+find_single_drs (struct loop *loop, struct graph *rdg, partition *partition,
data_reference_p *dst_dr, data_reference_p *src_dr)
{
unsigned i;
@@ -1347,10 +1353,12 @@ find_single_drs (struct graph *rdg, partition *partition,
&& DECL_BIT_FIELD (TREE_OPERAND (DR_REF (single_st), 1)))
return false;
- /* Data reference must be executed exactly once per iteration. */
+ /* Data reference must be executed exactly once per iteration of each
+ loop in the loop nest. We only need to check dominance information
+ against the outermost one in a perfect loop nest because a bb can't
+ dominate outermost loop's latch without dominating inner loop's. */
basic_block bb_st = gimple_bb (DR_STMT (single_st));
- struct loop *inner = bb_st->loop_father;
- if (!dominated_by_p (CDI_DOMINATORS, inner->latch, bb_st))
+ if (!dominated_by_p (CDI_DOMINATORS, loop->latch, bb_st))
return false;
if (single_ld)
@@ -1368,14 +1376,16 @@ find_single_drs (struct graph *rdg, partition *partition,
/* Load and store must be in the same loop nest. */
basic_block bb_ld = gimple_bb (DR_STMT (single_ld));
- if (inner != bb_ld->loop_father)
+ if (bb_st->loop_father != bb_ld->loop_father)
return false;
- /* Data reference must be executed exactly once per iteration. */
- if (!dominated_by_p (CDI_DOMINATORS, inner->latch, bb_ld))
+ /* Data reference must be executed exactly once per iteration.
+ Same as single_st, we only need to check against the outermost
+ loop. */
+ if (!dominated_by_p (CDI_DOMINATORS, loop->latch, bb_ld))
return false;
- edge e = single_exit (inner);
+ edge e = single_exit (bb_st->loop_father);
bool dom_ld = dominated_by_p (CDI_DOMINATORS, e->src, bb_ld);
bool dom_st = dominated_by_p (CDI_DOMINATORS, e->src, bb_st);
if (dom_ld != dom_st)
@@ -1503,7 +1513,17 @@ classify_builtin_st (loop_p loop, partition *partition, data_reference_p dr)
if (!compute_access_range (loop, dr, &base, &size))
return;
- partition->builtin = alloc_builtin (dr, NULL, base, NULL_TREE, size);
+ poly_uint64 base_offset;
+ unsigned HOST_WIDE_INT const_base_offset;
+ tree base_base = strip_offset (base, &base_offset);
+ if (!base_offset.is_constant (&const_base_offset))
+ return;
+
+ struct builtin_info *builtin;
+ builtin = alloc_builtin (dr, NULL, base, NULL_TREE, size);
+ builtin->dst_base_base = base_base;
+ builtin->dst_base_offset = const_base_offset;
+ partition->builtin = builtin;
partition->kind = PKIND_MEMSET;
}
@@ -1614,7 +1634,7 @@ classify_partition (loop_p loop, struct graph *rdg, partition *partition,
return;
/* Find single load/store data references for builtin partition. */
- if (!find_single_drs (rdg, partition, &single_st, &single_ld))
+ if (!find_single_drs (loop, rdg, partition, &single_st, &single_ld))
return;
/* Classify the builtin kind. */
@@ -2482,6 +2502,113 @@ version_for_distribution_p (vec<struct partition *> *partitions,
return (alias_ddrs->length () > 0);
}
+/* Compare base offset of builtin mem* partitions P1 and P2. */
+
+static bool
+offset_cmp (struct partition *p1, struct partition *p2)
+{
+ gcc_assert (p1 != NULL && p1->builtin != NULL);
+ gcc_assert (p2 != NULL && p2->builtin != NULL);
+ return p1->builtin->dst_base_offset < p2->builtin->dst_base_offset;
+}
+
+/* Fuse adjacent memset builtin PARTITIONS if possible. This is a special
+ case optimization transforming below code:
+
+ __builtin_memset (&obj, 0, 100);
+ _1 = &obj + 100;
+ __builtin_memset (_1, 0, 200);
+ _2 = &obj + 300;
+ __builtin_memset (_2, 0, 100);
+
+ into:
+
+ __builtin_memset (&obj, 0, 400);
+
+ Note we don't have dependence information between different partitions
+ at this point, as a result, we can't handle nonadjacent memset builtin
+ partitions since dependence might be broken. */
+
+static void
+fuse_memset_builtins (vec<struct partition *> *partitions)
+{
+ unsigned i, j;
+ struct partition *part1, *part2;
+
+ for (i = 0; partitions->iterate (i, &part1);)
+ {
+ if (part1->kind != PKIND_MEMSET)
+ {
+ i++;
+ continue;
+ }
+
+ /* Find sub-array of memset builtins of the same base. Index range
+ of the sub-array is [i, j) with "j > i". */
+ for (j = i + 1; partitions->iterate (j, &part2); ++j)
+ {
+ if (part2->kind != PKIND_MEMSET
+ || !operand_equal_p (part1->builtin->dst_base_base,
+ part2->builtin->dst_base_base, 0))
+ break;
+ }
+
+ /* Stable sort is required in order to avoid breaking dependence. */
+ std::stable_sort (&(*partitions)[i],
+ &(*partitions)[i] + j - i, offset_cmp);
+ /* Continue with next partition. */
+ i = j;
+ }
+
+ /* Merge all consecutive memset builtin partitions. */
+ for (i = 0; i < partitions->length () - 1;)
+ {
+ part1 = (*partitions)[i];
+ if (part1->kind != PKIND_MEMSET)
+ {
+ i++;
+ continue;
+ }
+
+ part2 = (*partitions)[i + 1];
+ /* Only merge memset partitions of the same base and with constant
+ access sizes. */
+ if (part2->kind != PKIND_MEMSET
+ || TREE_CODE (part1->builtin->size) != INTEGER_CST
+ || TREE_CODE (part2->builtin->size) != INTEGER_CST
+ || !operand_equal_p (part1->builtin->dst_base_base,
+ part2->builtin->dst_base_base, 0))
+ {
+ i++;
+ continue;
+ }
+ tree rhs1 = gimple_assign_rhs1 (DR_STMT (part1->builtin->dst_dr));
+ tree rhs2 = gimple_assign_rhs1 (DR_STMT (part2->builtin->dst_dr));
+ int bytev1 = const_with_all_bytes_same (rhs1);
+ int bytev2 = const_with_all_bytes_same (rhs2);
+ /* Only merge memset partitions of the same value. */
+ if (bytev1 != bytev2 || bytev1 == -1)
+ {
+ i++;
+ continue;
+ }
+ wide_int end1 = wi::add (part1->builtin->dst_base_offset,
+ wi::to_wide (part1->builtin->size));
+ /* Only merge adjacent memset partitions. */
+ if (wi::ne_p (end1, part2->builtin->dst_base_offset))
+ {
+ i++;
+ continue;
+ }
+ /* Merge partitions[i] and partitions[i+1]. */
+ part1->builtin->size = fold_build2 (PLUS_EXPR, sizetype,
+ part1->builtin->size,
+ part2->builtin->size);
+ partition_free (part2);
+ partitions->ordered_remove (i + 1);
+ }
+}
+
/* Fuse PARTITIONS of LOOP if necessary before finalizing distribution.
ALIAS_DDRS contains ddrs which need runtime alias check. */
@@ -2525,6 +2652,10 @@ finalize_partitions (struct loop *loop, vec<struct partition *> *partitions,
}
partitions->truncate (1);
}
+
+ /* Fuse memset builtins if possible. */
+ if (partitions->length () > 1)
+ fuse_memset_builtins (partitions);
}
/* Distributes the code from LOOP in such a way that producer statements