1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
|
/* Loop flattening for Graphite.
Copyright (C) 2010 Free Software Foundation, Inc.
Contributed by Sebastian Pop <sebastian.pop@amd.com>.
This file is part of GCC.
GCC is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 3, or (at your option)
any later version.
GCC is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with GCC; see the file COPYING3. If not see
<http://www.gnu.org/licenses/>. */
#include "config.h"
#include "system.h"
#include "coretypes.h"
#include "tm.h"
#include "ggc.h"
#include "tree.h"
#include "rtl.h"
#include "output.h"
#include "basic-block.h"
#include "diagnostic.h"
#include "tree-flow.h"
#include "toplev.h"
#include "tree-dump.h"
#include "timevar.h"
#include "cfgloop.h"
#include "tree-chrec.h"
#include "tree-data-ref.h"
#include "tree-scalar-evolution.h"
#include "tree-pass.h"
#include "domwalk.h"
#include "value-prof.h"
#include "pointer-set.h"
#include "gimple.h"
#include "params.h"
#ifdef HAVE_cloog
#include "ppl_c.h"
#include "sese.h"
#include "graphite-ppl.h"
#include "graphite.h"
#include "graphite-poly.h"
/* The loop flattening pass transforms loop nests into a single loop,
removing the loop nesting structure. The auto-vectorization can
then apply on the full loop body, without needing the outer-loop
vectorization.
The loop flattening pass that has been described in a very Fortran
specific way in the 1992 paper by Reinhard von Hanxleden and Ken
Kennedy: "Relaxing SIMD Control Flow Constraints using Loop
Transformations" available from
http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.54.5033
The canonical example is as follows: suppose that we have a loop
nest with known iteration counts
| for (i = 1; i <= 6; i++)
| for (j = 1; j <= 6; j++)
| S1(i,j);
The loop flattening is performed by linearizing the iteration space
using the function "f (x) = 6 * i + j". In this case, CLooG would
produce this code:
| for (c1=7;c1<=42;c1++) {
| i = floord(c1-1,6);
| S1(i,c1-6*i);
| }
There are several limitations for loop flattening that are linked
to the expressivity of the polyhedral model. One has to take an
upper bound approximation to deal with the parametric case of loop
flattening. For example, in the loop nest:
| for (i = 1; i <= N; i++)
| for (j = 1; j <= M; j++)
| S1(i,j);
One would like to flatten this loop using a linearization function
like this "f (x) = M * i + j". However CLooG's schedules are not
expressive enough to deal with this case, and so the parameter M
has to be replaced by an integer upper bound approximation. If we
further know in the context of the scop that "M <= 6", then it is
possible to linearize the loop with "f (x) = 6 * i + j". In this
case, CLooG would produce this code:
| for (c1=7;c1<=6*M+N;c1++) {
| i = ceild(c1-N,6);
| if (i <= floord(c1-1,6)) {
| S1(i,c1-6*i);
| }
| }
For an arbitrarily complex loop nest the algorithm proceeds in two
steps. First, the LST is flattened by removing the loops structure
and by inserting the statements in the order they appear in
depth-first order. Then, the scattering of each statement is
transformed accordingly.
Supposing that the original program is represented by the following
LST:
| (loop_1
| stmt_1
| (loop_2 stmt_3
| (loop_3 stmt_4)
| (loop_4 stmt_5 stmt_6)
| stmt_7
| )
| stmt_2
| )
Loop flattening traverses the LST in depth-first order, and
flattens pairs of loops successively by projecting the inner loops
in the iteration domain of the outer loops:
lst_project_loop (loop_2, loop_3, stride)
| (loop_1
| stmt_1
| (loop_2 stmt_3 stmt_4
| (loop_4 stmt_5 stmt_6)
| stmt_7
| )
| stmt_2
| )
lst_project_loop (loop_2, loop_4, stride)
| (loop_1
| stmt_1
| (loop_2 stmt_3 stmt_4 stmt_5 stmt_6 stmt_7)
| stmt_2
| )
lst_project_loop (loop_1, loop_2, stride)
| (loop_1
| stmt_1 stmt_3 stmt_4 stmt_5 stmt_6 stmt_7 stmt_2
| )
At each step, the iteration domain of the outer loop is enlarged to
contain enough points to iterate over the inner loop domain. */
/* Initializes RES to the number of iterations of the linearized loop
LST. RES is the cardinal of the iteration domain of LST. */
static void
lst_linearized_niter (lst_p lst, mpz_t res)
{
int i;
lst_p l;
mpz_t n;
mpz_init (n);
mpz_set_si (res, 0);
FOR_EACH_VEC_ELT (lst_p, LST_SEQ (lst), i, l)
if (LST_LOOP_P (l))
{
lst_linearized_niter (l, n);
mpz_add (res, res, n);
}
if (LST_LOOP_P (lst))
{
lst_niter_for_loop (lst, n);
if (mpz_cmp_si (res, 0) != 0)
mpz_mul (res, res, n);
else
mpz_set (res, n);
}
mpz_clear (n);
}
/* Applies the translation "f (x) = x + OFFSET" to the loop containing
STMT. */
static void
lst_offset (lst_p stmt, mpz_t offset)
{
lst_p inner = LST_LOOP_FATHER (stmt);
poly_bb_p pbb = LST_PBB (stmt);
ppl_Polyhedron_t poly = PBB_TRANSFORMED_SCATTERING (pbb);
int inner_depth = lst_depth (inner);
ppl_dimension_type inner_dim = psct_dynamic_dim (pbb, inner_depth);
ppl_Linear_Expression_t expr;
ppl_dimension_type dim;
ppl_Coefficient_t one;
mpz_t x;
mpz_init (x);
mpz_set_si (x, 1);
ppl_new_Coefficient (&one);
ppl_assign_Coefficient_from_mpz_t (one, x);
ppl_Polyhedron_space_dimension (poly, &dim);
ppl_new_Linear_Expression_with_dimension (&expr, dim);
ppl_set_coef (expr, inner_dim, 1);
ppl_set_inhomogeneous_gmp (expr, offset);
ppl_Polyhedron_affine_image (poly, inner_dim, expr, one);
ppl_delete_Linear_Expression (expr);
ppl_delete_Coefficient (one);
}
/* Scale by FACTOR the loop LST containing STMT. */
static void
lst_scale (lst_p lst, lst_p stmt, mpz_t factor)
{
mpz_t x;
ppl_Coefficient_t one;
int outer_depth = lst_depth (lst);
poly_bb_p pbb = LST_PBB (stmt);
ppl_Polyhedron_t poly = PBB_TRANSFORMED_SCATTERING (pbb);
ppl_dimension_type outer_dim = psct_dynamic_dim (pbb, outer_depth);
ppl_Linear_Expression_t expr;
ppl_dimension_type dim;
mpz_init (x);
mpz_set_si (x, 1);
ppl_new_Coefficient (&one);
ppl_assign_Coefficient_from_mpz_t (one, x);
ppl_Polyhedron_space_dimension (poly, &dim);
ppl_new_Linear_Expression_with_dimension (&expr, dim);
/* outer_dim = factor * outer_dim. */
ppl_set_coef_gmp (expr, outer_dim, factor);
ppl_Polyhedron_affine_image (poly, outer_dim, expr, one);
ppl_delete_Linear_Expression (expr);
mpz_clear (x);
ppl_delete_Coefficient (one);
}
/* Project the INNER loop into the iteration domain of the OUTER loop.
STRIDE is the number of iterations between two iterations of the
outer loop. */
static void
lst_project_loop (lst_p outer, lst_p inner, mpz_t stride)
{
int i;
lst_p stmt;
mpz_t x;
ppl_Coefficient_t one;
int outer_depth = lst_depth (outer);
int inner_depth = lst_depth (inner);
mpz_init (x);
mpz_set_si (x, 1);
ppl_new_Coefficient (&one);
ppl_assign_Coefficient_from_mpz_t (one, x);
FOR_EACH_VEC_ELT (lst_p, LST_SEQ (inner), i, stmt)
{
poly_bb_p pbb = LST_PBB (stmt);
ppl_Polyhedron_t poly = PBB_TRANSFORMED_SCATTERING (pbb);
ppl_dimension_type outer_dim = psct_dynamic_dim (pbb, outer_depth);
ppl_dimension_type inner_dim = psct_dynamic_dim (pbb, inner_depth);
ppl_Linear_Expression_t expr;
ppl_dimension_type dim;
ppl_dimension_type *ds;
/* There should be no loops under INNER. */
gcc_assert (!LST_LOOP_P (stmt));
ppl_Polyhedron_space_dimension (poly, &dim);
ppl_new_Linear_Expression_with_dimension (&expr, dim);
/* outer_dim = outer_dim * stride + inner_dim. */
ppl_set_coef (expr, inner_dim, 1);
ppl_set_coef_gmp (expr, outer_dim, stride);
ppl_Polyhedron_affine_image (poly, outer_dim, expr, one);
ppl_delete_Linear_Expression (expr);
/* Project on inner_dim. */
ppl_new_Linear_Expression_with_dimension (&expr, dim - 1);
ppl_Polyhedron_affine_image (poly, inner_dim, expr, one);
ppl_delete_Linear_Expression (expr);
/* Remove inner loop and the static schedule of its body. */
ds = XNEWVEC (ppl_dimension_type, 2);
ds[0] = inner_dim;
ds[1] = inner_dim + 1;
ppl_Polyhedron_remove_space_dimensions (poly, ds, 2);
PBB_NB_SCATTERING_TRANSFORM (pbb) -= 2;
free (ds);
}
mpz_clear (x);
ppl_delete_Coefficient (one);
}
/* Flattens the loop nest LST. Return true when something changed.
OFFSET is used to compute the number of iterations of the outermost
loop before the current LST is executed. */
static bool
lst_flatten_loop (lst_p lst, mpz_t init_offset)
{
int i;
lst_p l;
bool res = false;
mpz_t n, one, offset, stride;
mpz_init (n);
mpz_init (one);
mpz_init (offset);
mpz_init (stride);
mpz_set (offset, init_offset);
mpz_set_si (one, 1);
lst_linearized_niter (lst, stride);
lst_niter_for_loop (lst, n);
mpz_tdiv_q (stride, stride, n);
FOR_EACH_VEC_ELT (lst_p, LST_SEQ (lst), i, l)
if (LST_LOOP_P (l))
{
res = true;
lst_flatten_loop (l, offset);
lst_niter_for_loop (l, n);
lst_project_loop (lst, l, stride);
/* The offset is the number of iterations minus 1, as we want
to execute the next statements at the same iteration as the
last iteration of the loop. */
mpz_sub (n, n, one);
mpz_add (offset, offset, n);
}
else
{
lst_scale (lst, l, stride);
if (mpz_cmp_si (offset, 0) != 0)
lst_offset (l, offset);
}
FOR_EACH_VEC_ELT (lst_p, LST_SEQ (lst), i, l)
if (LST_LOOP_P (l))
lst_remove_loop_and_inline_stmts_in_loop_father (l);
mpz_clear (n);
mpz_clear (one);
mpz_clear (offset);
mpz_clear (stride);
return res;
}
/* Remove all but the first 3 dimensions of the scattering:
- dim0: the static schedule for the loop
- dim1: the dynamic schedule of the loop
- dim2: the static schedule for the loop body. */
static void
remove_unused_scattering_dimensions (lst_p lst)
{
int i;
lst_p stmt;
mpz_t x;
ppl_Coefficient_t one;
mpz_init (x);
mpz_set_si (x, 1);
ppl_new_Coefficient (&one);
ppl_assign_Coefficient_from_mpz_t (one, x);
FOR_EACH_VEC_ELT (lst_p, LST_SEQ (lst), i, stmt)
{
poly_bb_p pbb = LST_PBB (stmt);
ppl_Polyhedron_t poly = PBB_TRANSFORMED_SCATTERING (pbb);
int j, nb_dims_to_remove = PBB_NB_SCATTERING_TRANSFORM (pbb) - 3;
ppl_dimension_type *ds;
/* There should be no loops inside LST after flattening. */
gcc_assert (!LST_LOOP_P (stmt));
if (!nb_dims_to_remove)
continue;
ds = XNEWVEC (ppl_dimension_type, nb_dims_to_remove);
for (j = 0; j < nb_dims_to_remove; j++)
ds[j] = j + 3;
ppl_Polyhedron_remove_space_dimensions (poly, ds, nb_dims_to_remove);
PBB_NB_SCATTERING_TRANSFORM (pbb) -= nb_dims_to_remove;
free (ds);
}
mpz_clear (x);
ppl_delete_Coefficient (one);
}
/* Flattens all the loop nests of LST. Return true when something
changed. */
static bool
lst_do_flatten (lst_p lst)
{
int i;
lst_p l;
bool res = false;
mpz_t zero;
if (!lst
|| !LST_LOOP_P (lst))
return false;
mpz_init (zero);
mpz_set_si (zero, 0);
FOR_EACH_VEC_ELT (lst_p, LST_SEQ (lst), i, l)
if (LST_LOOP_P (l))
{
res |= lst_flatten_loop (l, zero);
remove_unused_scattering_dimensions (l);
}
lst_update_scattering (lst);
mpz_clear (zero);
return res;
}
/* Flatten all the loop nests in SCOP. Returns true when something
changed. */
bool
flatten_all_loops (scop_p scop)
{
return lst_do_flatten (SCOP_TRANSFORMED_SCHEDULE (scop));
}
#endif
|