summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authormircea <mircea@138bc75d-0d04-0410-961f-82ee72b054a4>2014-11-15 15:37:49 +0000
committermircea <mircea@138bc75d-0d04-0410-961f-82ee72b054a4>2014-11-15 15:37:49 +0000
commit120092dd066ddf20116da297fe1aa1b79353493f (patch)
tree5205bdb5761945fee1b6c762aab0ed2eca3079be
parent88421f3a44a45f3ed241aae8955ee5f2ecc652ac (diff)
downloadgcc-120092dd066ddf20116da297fe1aa1b79353493f.tar.gz
New unroll and jam option in Graphite.
git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@217604 138bc75d-0d04-0410-961f-82ee72b054a4
-rw-r--r--gcc/ChangeLog25
-rw-r--r--gcc/common.opt4
-rw-r--r--gcc/graphite-isl-ast-to-gimple.c102
-rw-r--r--gcc/graphite-optimize-isl.c172
-rw-r--r--gcc/graphite-poly.c3
-rw-r--r--gcc/graphite-poly.h3
-rw-r--r--gcc/graphite.c3
-rw-r--r--gcc/params.def15
-rw-r--r--gcc/toplev.c5
9 files changed, 308 insertions, 24 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index bbf3a804cc5..42fe8fa0b68 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,28 @@
+2014-11-15 Mircea Namolaru <mircea.namolaru@inria.fr>
+
+ * common.opt (flag_loop_unroll_and_jam): New flag.
+ * params.def (PARAM_LOOP_UNROLL_JAM_SIZE): Parameter for unroll and
+ jam flag.
+ (PARAM_LOOP_UNROLL_JAM_DEPTH): Likewise.
+ * graphite-poly.h (struct poly_bb:map_sepclass): New field
+ * graphite-poly.c (new_poly_bb): Initialization for new field.
+ (apply_poly_transforms): Support for unroll and jam flag.
+ * graphite-isl-ast-to-gimple.c (generate_luj_sepclass): Compute the
+ separation class.
+ (generate_luj_sepclass_opt): Build the separation class option.
+ (generate_luj_options): Set unroll and jam options.
+ (set_options): Support for unroll and jam options.
+ (scop_to_isl_ast): Likewise
+ * graphite-optimize-isl.c (getPrevectorMap_full): New function for
+ computing the separating class map.
+ (optimize_isl): Support for the separating class map.
+ (apply_schedule_map_to_scop): Likewise.
+ (getScheduleMap): Likewise.
+ (getScheduleForBand): Likewise.
+ (getScheduleForBandList): Likewise.
+ * graphite.c (gate_graphite_transforms): Add unroll and jam flag.
+ * toplev.c (process_options) Likewise.
+
2014-11-15 Eric Botcazou <ebotcazou@adacore.com>
* tree-cfg.c (replace_loop_annotate_in_block): New function extracted
diff --git a/gcc/common.opt b/gcc/common.opt
index 06daa43d231..3a6d7e10c2c 100644
--- a/gcc/common.opt
+++ b/gcc/common.opt
@@ -1315,6 +1315,10 @@ floop-block
Common Report Var(flag_loop_block) Optimization
Enable Loop Blocking transformation
+floop-unroll-and-jam
+Common Report Var(flag_loop_unroll_jam) Optimization
+Enable Loop Unroll Jam transformation
+
fgnu-tm
Common Report Var(flag_tm)
Enable support for GNU transactional memory
diff --git a/gcc/graphite-isl-ast-to-gimple.c b/gcc/graphite-isl-ast-to-gimple.c
index 46ccbf4fd05..bbf30550e94 100644
--- a/gcc/graphite-isl-ast-to-gimple.c
+++ b/gcc/graphite-isl-ast-to-gimple.c
@@ -831,6 +831,92 @@ extend_schedule (__isl_take isl_map *schedule, int nb_schedule_dims)
return schedule;
}
+/* Set the separation_class option for unroll and jam. */
+
+static __isl_give isl_union_map *
+generate_luj_sepclass_opt (scop_p scop, __isl_take isl_union_set *domain,
+ int dim, int cl)
+{
+ isl_map *map;
+ isl_space *space, *space_sep;
+ isl_ctx *ctx;
+ isl_union_map *mapu;
+ int nsched = get_max_schedule_dimensions (scop);
+
+ ctx = scop->ctx;
+ space_sep = isl_space_alloc (ctx, 0, 1, 1);
+ space_sep = isl_space_wrap (space_sep);
+ space_sep = isl_space_set_tuple_name (space_sep, isl_dim_set,
+ "separation_class");
+ space = isl_set_get_space (scop->context);
+ space_sep = isl_space_align_params (space_sep, isl_space_copy(space));
+ space = isl_space_map_from_domain_and_range (space, space_sep);
+ space = isl_space_add_dims (space,isl_dim_in, nsched);
+ map = isl_map_universe (space);
+ isl_map_fix_si (map,isl_dim_out,0,dim);
+ isl_map_fix_si (map,isl_dim_out,1,cl);
+
+ mapu = isl_union_map_intersect_domain (isl_union_map_from_map (map),
+ domain);
+ return (mapu);
+}
+
+/* Compute the separation class for loop unroll and jam. */
+
+static __isl_give isl_union_set *
+generate_luj_sepclass (scop_p scop)
+{
+ int i;
+ poly_bb_p pbb;
+ isl_union_set *domain_isl;
+
+ domain_isl = isl_union_set_empty (isl_set_get_space (scop->context));
+
+ FOR_EACH_VEC_ELT (SCOP_BBS (scop), i, pbb)
+ {
+ isl_set *bb_domain;
+ isl_set *bb_domain_s;
+
+ if (pbb->map_sepclass == NULL)
+ continue;
+
+ if (isl_set_is_empty (pbb->domain))
+ continue;
+
+ bb_domain = isl_set_copy (pbb->domain);
+ bb_domain_s = isl_set_apply (bb_domain, pbb->map_sepclass);
+ pbb->map_sepclass = NULL;
+
+ domain_isl =
+ isl_union_set_union (domain_isl, isl_union_set_from_set (bb_domain_s));
+ }
+
+ return domain_isl;
+}
+
+/* Set the AST built options for loop unroll and jam. */
+
+static __isl_give isl_union_map *
+generate_luj_options (scop_p scop)
+{
+ isl_union_set *domain_isl;
+ isl_union_map *options_isl_ss;
+ isl_union_map *options_isl =
+ isl_union_map_empty (isl_set_get_space (scop->context));
+ int dim = get_max_schedule_dimensions (scop) - 1;
+ int dim1 = dim - PARAM_VALUE (PARAM_LOOP_UNROLL_JAM_DEPTH);
+
+ if (!flag_loop_unroll_jam)
+ return options_isl;
+
+ domain_isl = generate_luj_sepclass (scop);
+
+ options_isl_ss = generate_luj_sepclass_opt (scop, domain_isl, dim1, 0);
+ options_isl = isl_union_map_union (options_isl, options_isl_ss);
+
+ return options_isl;
+}
+
/* Generates a schedule, which specifies an order used to
visit elements in a domain. */
@@ -879,11 +965,13 @@ ast_build_before_for (__isl_keep isl_ast_build *build, void *user)
}
/* Set the separate option for all dimensions.
- This helps to reduce control overhead. */
+ This helps to reduce control overhead.
+ Set the options for unroll and jam. */
static __isl_give isl_ast_build *
set_options (__isl_take isl_ast_build *control,
- __isl_keep isl_union_map *schedule)
+ __isl_keep isl_union_map *schedule,
+ __isl_take isl_union_map *opt_luj)
{
isl_ctx *ctx = isl_union_map_get_ctx (schedule);
isl_space *range_space = isl_space_set_alloc (ctx, 0, 1);
@@ -894,6 +982,9 @@ set_options (__isl_take isl_ast_build *control,
isl_union_set *domain = isl_union_map_range (isl_union_map_copy (schedule));
domain = isl_union_set_universe (domain);
isl_union_map *options = isl_union_map_from_domain_and_range (domain, range);
+
+ options = isl_union_map_union (options, opt_luj);
+
return isl_ast_build_set_options (control, options);
}
@@ -907,9 +998,14 @@ scop_to_isl_ast (scop_p scop, ivs_params &ip)
isl_options_set_ast_build_atomic_upper_bound (scop->ctx, true);
add_parameters_to_ivs_params (scop, ip);
+
+ isl_union_map *options_luj = generate_luj_options (scop);
+
isl_union_map *schedule_isl = generate_isl_schedule (scop);
isl_ast_build *context_isl = generate_isl_context (scop);
- context_isl = set_options (context_isl, schedule_isl);
+
+ context_isl = set_options (context_isl, schedule_isl, options_luj);
+
isl_union_map *dependences = NULL;
if (flag_loop_parallelize_all)
{
diff --git a/gcc/graphite-optimize-isl.c b/gcc/graphite-optimize-isl.c
index c1d04afd3f4..cbab8202244 100644
--- a/gcc/graphite-optimize-isl.c
+++ b/gcc/graphite-optimize-isl.c
@@ -186,7 +186,7 @@ getScheduleForBand (isl_band *Band, int *Dimensions)
PartialSchedule = isl_band_get_partial_schedule (Band);
*Dimensions = isl_band_n_member (Band);
- if (DisableTiling)
+ if (DisableTiling || flag_loop_unroll_jam)
return PartialSchedule;
/* It does not make any sense to tile a band with just one dimension. */
@@ -241,7 +241,9 @@ getScheduleForBand (isl_band *Band, int *Dimensions)
constant number of iterations, if the number of loop iterations at
DimToVectorize can be devided by VectorWidth. The default VectorWidth is
currently constant and not yet target specific. This function does not reason
- about parallelism. */
+ about parallelism.
+
+ */
static isl_map *
getPrevectorMap (isl_ctx *ctx, int DimToVectorize,
int ScheduleDimensions,
@@ -305,7 +307,97 @@ getPrevectorMap (isl_ctx *ctx, int DimToVectorize,
isl_constraint_set_constant_si (c, VectorWidth - 1);
TilingMap = isl_map_add_constraint (TilingMap, c);
- isl_map_dump (TilingMap);
+ return TilingMap;
+}
+
+/* Compute an auxiliary map to getPrevectorMap, for computing the separating
+ class defined by full tiles. Used in graphite_isl_ast_to_gimple.c to set the
+ corresponding option for AST build.
+
+ The map (for VectorWidth=4):
+
+ [i,j] -> [it,j,ip] : it % 4 = 0 and it <= ip <= it + 3 and it + 3 = i and
+ ip >= 0
+
+ The image of this map is the separation class. The range of this map includes
+ all the i that are multiple of 4 in the domain beside the greater one.
+
+ */
+static isl_map *
+getPrevectorMap_full (isl_ctx *ctx, int DimToVectorize,
+ int ScheduleDimensions,
+ int VectorWidth)
+{
+ isl_space *Space;
+ isl_local_space *LocalSpace, *LocalSpaceRange;
+ isl_set *Modulo;
+ isl_map *TilingMap;
+ isl_constraint *c;
+ isl_aff *Aff;
+ int PointDimension; /* ip */
+ int TileDimension; /* it */
+ isl_val *VectorWidthMP;
+ int i;
+
+ /* assert (0 <= DimToVectorize && DimToVectorize < ScheduleDimensions);*/
+
+ Space = isl_space_alloc (ctx, 0, ScheduleDimensions, ScheduleDimensions + 1);
+ TilingMap = isl_map_universe (isl_space_copy (Space));
+ LocalSpace = isl_local_space_from_space (Space);
+ PointDimension = ScheduleDimensions;
+ TileDimension = DimToVectorize;
+
+ /* Create an identity map for everything except DimToVectorize and the
+ point loop. */
+ for (i = 0; i < ScheduleDimensions; i++)
+ {
+ if (i == DimToVectorize)
+ continue;
+
+ c = isl_equality_alloc (isl_local_space_copy (LocalSpace));
+
+ isl_constraint_set_coefficient_si (c, isl_dim_in, i, -1);
+ isl_constraint_set_coefficient_si (c, isl_dim_out, i, 1);
+
+ TilingMap = isl_map_add_constraint (TilingMap, c);
+ }
+
+ /* it % 'VectorWidth' = 0 */
+ LocalSpaceRange = isl_local_space_range (isl_local_space_copy (LocalSpace));
+ Aff = isl_aff_zero_on_domain (LocalSpaceRange);
+ Aff = isl_aff_set_constant_si (Aff, VectorWidth);
+ Aff = isl_aff_set_coefficient_si (Aff, isl_dim_in, TileDimension, 1);
+
+ VectorWidthMP = isl_val_int_from_si (ctx, VectorWidth);
+ Aff = isl_aff_mod_val (Aff, VectorWidthMP);
+ Modulo = isl_pw_aff_zero_set (isl_pw_aff_from_aff (Aff));
+ TilingMap = isl_map_intersect_range (TilingMap, Modulo);
+
+ /* it + ('VectorWidth' - 1) = i0 */
+ c = isl_equality_alloc (isl_local_space_copy(LocalSpace));
+ isl_constraint_set_coefficient_si (c, isl_dim_out, TileDimension,-1);
+ isl_constraint_set_coefficient_si (c, isl_dim_in, TileDimension, 1);
+ isl_constraint_set_constant_si (c, -VectorWidth + 1);
+ TilingMap = isl_map_add_constraint (TilingMap, c);
+
+ /* ip >= 0 */
+ c = isl_inequality_alloc (isl_local_space_copy (LocalSpace));
+ isl_constraint_set_coefficient_si (c, isl_dim_out, PointDimension, 1);
+ isl_constraint_set_constant_si (c, 0);
+ TilingMap = isl_map_add_constraint (TilingMap, c);
+
+ /* it <= ip */
+ c = isl_inequality_alloc (isl_local_space_copy (LocalSpace));
+ isl_constraint_set_coefficient_si (c, isl_dim_out, TileDimension, -1);
+ isl_constraint_set_coefficient_si (c, isl_dim_out, PointDimension, 1);
+ TilingMap = isl_map_add_constraint (TilingMap, c);
+
+ /* ip <= it + ('VectorWidth' - 1) */
+ c = isl_inequality_alloc (LocalSpace);
+ isl_constraint_set_coefficient_si (c, isl_dim_out, TileDimension, 1);
+ isl_constraint_set_coefficient_si (c, isl_dim_out, PointDimension, -1);
+ isl_constraint_set_constant_si (c, VectorWidth - 1);
+ TilingMap = isl_map_add_constraint (TilingMap, c);
return TilingMap;
}
@@ -316,9 +408,11 @@ static bool EnablePollyVector = false;
We walk recursively the forest of bands to combine the schedules of the
individual bands to the overall schedule. In case tiling is requested,
- the individual bands are tiled. */
+ the individual bands are tiled.
+ For unroll and jam the map the schedule for full tiles of the unrolled
+ dimnesion is computed. */
static isl_union_map *
-getScheduleForBandList (isl_band_list *BandList)
+getScheduleForBandList (isl_band_list *BandList, isl_union_map **map_sepcl)
{
int NumBands, i;
isl_union_map *Schedule;
@@ -335,55 +429,87 @@ getScheduleForBandList (isl_band_list *BandList)
int ScheduleDimensions;
isl_space *Space;
+ isl_union_map *PartialSchedule_f;
+
Band = isl_band_list_get_band (BandList, i);
PartialSchedule = getScheduleForBand (Band, &ScheduleDimensions);
Space = isl_union_map_get_space (PartialSchedule);
+ PartialSchedule_f = NULL;
+
if (isl_band_has_children (Band))
{
isl_band_list *Children;
isl_union_map *SuffixSchedule;
Children = isl_band_get_children (Band);
- SuffixSchedule = getScheduleForBandList (Children);
+ SuffixSchedule = getScheduleForBandList (Children, map_sepcl);
PartialSchedule = isl_union_map_flat_range_product (PartialSchedule,
SuffixSchedule);
isl_band_list_free (Children);
}
- else if (EnablePollyVector)
+ else if (EnablePollyVector || flag_loop_unroll_jam)
{
+ int i;
+ int depth;
+
+ depth = PARAM_VALUE (PARAM_LOOP_UNROLL_JAM_DEPTH);
+
for (i = ScheduleDimensions - 1 ; i >= 0 ; i--)
{
+ if (flag_loop_unroll_jam && (i != (ScheduleDimensions - depth)))
+ continue;
+
if (isl_band_member_is_zero_distance (Band, i))
{
isl_map *TileMap;
isl_union_map *TileUMap;
+ int stride;
+
+ stride = PARAM_VALUE (PARAM_LOOP_UNROLL_JAM_SIZE);
+
+ TileMap = getPrevectorMap_full (ctx, i, ScheduleDimensions,
+ stride);
+ TileUMap = isl_union_map_from_map (TileMap);
+ TileUMap = isl_union_map_align_params
+ (TileUMap, isl_space_copy (Space));
+ PartialSchedule_f = isl_union_map_apply_range
+ (isl_union_map_copy (PartialSchedule), TileUMap);
- TileMap = getPrevectorMap (ctx, i, ScheduleDimensions, 4);
+ TileMap = getPrevectorMap (ctx, i, ScheduleDimensions, stride);
TileUMap = isl_union_map_from_map (TileMap);
TileUMap = isl_union_map_align_params
(TileUMap, isl_space_copy (Space));
PartialSchedule = isl_union_map_apply_range
(PartialSchedule, TileUMap);
break;
- }
+ }
}
}
-
Schedule = isl_union_map_union (Schedule, PartialSchedule);
isl_band_free (Band);
isl_space_free (Space);
+
+ if (!flag_loop_unroll_jam)
+ continue;
+
+ if (PartialSchedule_f)
+ *map_sepcl = isl_union_map_union (*map_sepcl,
+ PartialSchedule_f);
+ else
+ *map_sepcl = isl_union_map_union (*map_sepcl,
+ isl_union_map_copy (PartialSchedule));
}
return Schedule;
}
static isl_union_map *
-getScheduleMap (isl_schedule *Schedule)
+getScheduleMap (isl_schedule *Schedule, isl_union_map **map_sepcl)
{
isl_band_list *BandList = isl_schedule_get_band_forest (Schedule);
- isl_union_map *ScheduleMap = getScheduleForBandList (BandList);
+ isl_union_map *ScheduleMap = getScheduleForBandList (BandList, map_sepcl);
isl_band_list_free (BandList);
return ScheduleMap;
}
@@ -398,7 +524,7 @@ getSingleMap (__isl_take isl_map *map, void *user)
}
static void
-apply_schedule_map_to_scop (scop_p scop, isl_union_map *schedule_map)
+apply_schedule_map_to_scop (scop_p scop, isl_union_map *schedule_map, bool sepcl)
{
int i;
poly_bb_p pbb;
@@ -413,8 +539,15 @@ apply_schedule_map_to_scop (scop_p scop, isl_union_map *schedule_map)
(isl_union_map_copy (schedule_map),
isl_union_set_from_set (domain));
isl_union_map_foreach_map (stmtBand, getSingleMap, &stmtSchedule);
- isl_map_free (pbb->transformed);
- pbb->transformed = stmtSchedule;
+
+ if (!sepcl)
+ {
+ isl_map_free (pbb->transformed);
+ pbb->transformed = stmtSchedule;
+ }
+ else
+ pbb->map_sepclass = stmtSchedule;
+
isl_union_map_free (stmtBand);
}
}
@@ -429,6 +562,7 @@ optimize_isl (scop_p scop)
isl_union_set *domain;
isl_union_map *validity, *proximity, *dependences;
isl_union_map *schedule_map;
+ isl_union_map *schedule_map_f;
domain = scop_get_domains (scop);
dependences = scop_get_dependences (scop);
@@ -450,9 +584,13 @@ optimize_isl (scop_p scop)
if (!schedule)
return false;
- schedule_map = getScheduleMap (schedule);
+ schedule_map_f = isl_union_map_empty (isl_space_params_alloc (scop->ctx, 0));
+ schedule_map = getScheduleMap (schedule, &schedule_map_f);
- apply_schedule_map_to_scop (scop, schedule_map);
+ apply_schedule_map_to_scop (scop, schedule_map, false);
+ if (!isl_union_map_is_empty (schedule_map_f))
+ apply_schedule_map_to_scop (scop, schedule_map_f, true);
+ isl_union_map_free (schedule_map_f);
isl_schedule_free (schedule);
isl_union_map_free (schedule_map);
diff --git a/gcc/graphite-poly.c b/gcc/graphite-poly.c
index 8ab0cb57c03..77ccc3d605f 100644
--- a/gcc/graphite-poly.c
+++ b/gcc/graphite-poly.c
@@ -272,7 +272,7 @@ apply_poly_transforms (scop_p scop)
/* This pass needs to be run at the final stage, as it does not
update the lst. */
- if (flag_loop_optimize_isl)
+ if (flag_loop_optimize_isl || flag_loop_unroll_jam)
transform_done |= optimize_isl (scop);
return transform_done;
@@ -323,6 +323,7 @@ new_poly_bb (scop_p scop, void *black_box)
pbb->schedule = NULL;
pbb->transformed = NULL;
pbb->saved = NULL;
+ pbb->map_sepclass = NULL;
PBB_SCOP (pbb) = scop;
pbb_set_black_box (pbb, black_box);
PBB_TRANSFORMED (pbb) = NULL;
diff --git a/gcc/graphite-poly.h b/gcc/graphite-poly.h
index 9c7639a901d..c8d9fa1fbb3 100644
--- a/gcc/graphite-poly.h
+++ b/gcc/graphite-poly.h
@@ -349,6 +349,9 @@ struct poly_bb
poly_scattering_p _saved;
isl_map *saved;
+ /* For tiling, the map for computing the separating class. */
+ isl_map *map_sepclass;
+
/* True when this PBB contains only a reduction statement. */
bool is_reduction;
};
diff --git a/gcc/graphite.c b/gcc/graphite.c
index a8c5a5bec11..59d5a136cfc 100644
--- a/gcc/graphite.c
+++ b/gcc/graphite.c
@@ -343,7 +343,8 @@ gate_graphite_transforms (void)
|| flag_loop_strip_mine
|| flag_graphite_identity
|| flag_loop_parallelize_all
- || flag_loop_optimize_isl)
+ || flag_loop_optimize_isl
+ || flag_loop_unroll_jam)
flag_graphite = 1;
return flag_graphite != 0;
diff --git a/gcc/params.def b/gcc/params.def
index d2d2add4e24..6c713268b97 100644
--- a/gcc/params.def
+++ b/gcc/params.def
@@ -847,6 +847,21 @@ DEFPARAM (PARAM_LOOP_BLOCK_TILE_SIZE,
"size of tiles for loop blocking",
51, 0, 0)
+/* Size of unrolling factor for unroll-and-jam. */
+
+DEFPARAM (PARAM_LOOP_UNROLL_JAM_SIZE,
+ "loop-unroll-jam-size",
+ "size of unrolling factor for unroll-and-jam",
+ 4, 0, 0)
+
+/* Size of the band formed by the strip mined dimension and the most inner one for unroll-and-jam. */
+
+DEFPARAM (PARAM_LOOP_UNROLL_JAM_DEPTH,
+ "loop-unroll-jam-depth",
+ "depth of unrolled loop for unroll-and-jam",
+ 2, 0, 0)
+
+
/* Maximal number of parameters that we allow in a SCoP. */
DEFPARAM (PARAM_GRAPHITE_MAX_NB_SCOP_PARAMS,
diff --git a/gcc/toplev.c b/gcc/toplev.c
index 2ed76bc610d..2e480471d6b 100644
--- a/gcc/toplev.c
+++ b/gcc/toplev.c
@@ -1346,11 +1346,12 @@ process_options (void)
|| flag_loop_block
|| flag_loop_interchange
|| flag_loop_strip_mine
- || flag_loop_parallelize_all)
+ || flag_loop_parallelize_all
+ || flag_loop_unroll_jam)
sorry ("Graphite loop optimizations cannot be used (ISL is not available)"
"(-fgraphite, -fgraphite-identity, -floop-block, "
"-floop-interchange, -floop-strip-mine, -floop-parallelize-all, "
- "and -ftree-loop-linear)");
+ "-floop-unroll-and-jam, and -ftree-loop-linear)");
#endif
if (flag_check_pointer_bounds)