mlir/lib/Dialect/SparseTensor/Transforms/CodegenEnv.h


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224

//===- CodegenEnv.h - Code generation environment class ---------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// This header file defines the code generation environment class.
//
//===----------------------------------------------------------------------===//

#ifndef MLIR_DIALECT_SPARSETENSOR_TRANSFORMS_CODEGENENV_H_
#define MLIR_DIALECT_SPARSETENSOR_TRANSFORMS_CODEGENENV_H_

#include "CodegenUtils.h"
#include "LoopEmitter.h"

#include "mlir/Dialect/Linalg/IR/Linalg.h"
#include "mlir/Dialect/SparseTensor/IR/SparseTensor.h"
#include "mlir/Dialect/SparseTensor/Transforms/Passes.h"
#include "mlir/Dialect/SparseTensor/Utils/Merger.h"
#include <optional>

namespace mlir {
namespace sparse_tensor {

/// The code generation environment class aggregates a number of data
/// structures that are needed during the code generation phase of
/// sparsification. This environment simplifies passing around such
/// data during sparsification (rather than passing around all the
/// individual compoments where needed). Furthermore, it provides
/// convience methods that keep implementation details transparent
/// to sparsification while asserting on internal consistency.
class CodegenEnv {
public:
  /// Constructs a code generation environment which can be
  /// passed around during sparsification for bookkeeping
  /// together with some consistency asserts.
  CodegenEnv(linalg::GenericOp linop, SparsificationOptions opts,
             unsigned numTensors, unsigned numLoops, unsigned numFilterLoops,
             unsigned maxRank);

  //
  // General methods.
  //

  LogicalResult initTensorExp();
  ExprId getExprId() const { return tensorExp; }

  linalg::GenericOp op() const { return linalgOp; }
  const SparsificationOptions &options() const { return sparseOptions; }
  Merger &merger() { return latticeMerger; }
  LoopEmitter &emitter() { return loopEmitter; }

  void startEmit();

  /// Generates loop boundary statements (entering/exiting loops). The function
  /// passes and updates the passed-in parameters.
  std::optional<Operation *>
  genLoopBoundary(function_ref<
                  std::optional<Operation *>(MutableArrayRef<Value> parameters)>
                      callback);

  //
  // Merger delegates.
  //

  constexpr TensorId makeTensorId(unsigned t) const {
    return latticeMerger.makeTensorId(t);
  }
  constexpr LoopId makeLoopId(unsigned i) const {
    return latticeMerger.makeLoopId(i);
  }
  constexpr TensorLoopId makeTensorLoopId(unsigned t, unsigned i) const {
    return latticeMerger.makeTensorLoopId(t, i);
  }
  const TensorExp &exp(ExprId e) const { return latticeMerger.exp(e); }
  const LatPoint &lat(LatPointId l) const { return latticeMerger.lat(l); }
  ArrayRef<LatPointId> set(LatSetId s) const { return latticeMerger.set(s); }
  DimLevelType dlt(TensorId t, LoopId i) const {
    return latticeMerger.getLvlType(t, i);
  }
  DimLevelType dlt(TensorLoopId b) const { return latticeMerger.getLvlType(b); }

  //
  // LoopEmitter delegates.
  //

  TensorLevel makeTensorLevel(TensorId t, Level l) const {
    // Make sure LoopEmitter, GenericOp, and Merger agree on the number of
    // tensors. Merger has one more synthetic tensor for loop invariants.
    assert(loopEmitter.getNumTensors() == linalgOp->getNumOperands() &&
           loopEmitter.getNumTensors() == latticeMerger.getNumTensors() - 1);
    return loopEmitter.makeTensorLevel(t, l);
  }
  std::pair<TensorId, Level> unpackTensorLevel(TensorLevel tl) const {
    return loopEmitter.unpackTensorLevel(tl);
  }
  template <class ContainerTy>
  auto unpackTensorLevelRange(ContainerTy &&c) const {
    return loopEmitter.unpackTensorLevelRange(std::forward<ContainerTy>(c));
  }

  //
  // Code generation environment verify functions.
  //

  /// Whether the tensor expression is admissible for codegen.
  /// It also sets the sparseOut if the output tensor is sparse.
  bool isAdmissibleTensorExp(ExprId e);

  /// Whether the iteration graph is sorted in admissible topoOrder.
  /// Sets outerParNest on success with sparse output
  bool isAdmissibleTopoOrder();

  //
  // Topological delegate and sort methods.
  //

  LoopOrd topSortSize() const { return topSort.size(); }
  LoopId topSortAt(LoopOrd n) const { return topSort.at(n); }
  void topSortPushBack(LoopId i) { topSort.push_back(i); }
  void topSortClear(size_t capacity = 0) {
    topSort.clear();
    topSort.reserve(capacity);
  }

  ArrayRef<LoopId> getTopSortSlice(LoopOrd n, LoopOrd m) const;
  ArrayRef<LoopId> getLoopStackUpTo(LoopOrd n) const;
  ArrayRef<LoopId> getCurrentLoopStack() const;
  /// Returns the induction-variable for the loop identified by the given
  /// `LoopId`.  This method handles application of the topological sort
  /// in order to convert the `LoopId` into the corresponding `LoopOrd`.
  Value getLoopVar(LoopId i) const;

  //
  // Sparse tensor output and expansion methods.
  //

  bool hasSparseOutput() const { return sparseOut != nullptr; }
  bool isSparseOutput(OpOperand *o) const { return sparseOut == o; }

  Value getInsertionChain() const { return insChain; }
  void updateInsertionChain(Value chain);

  // FIXME: clarify what this "rank" is really supposed to mean/be.
  bool atExpandLevel(OpOperand *o, unsigned rank, LoopOrd n) const;
  void startExpand(Value values, Value filled, Value added, Value count);
  bool isExpand() const { return expValues != nullptr; }
  void updateExpandCount(Value count);
  Value getExpandValues() const { return expValues; }
  Value getExpandFilled() const { return expFilled; }
  Value getExpandAdded() const { return expAdded; }
  Value getExpandCount() const { return expCount; }
  void endExpand();

  //
  // Reduction methods.
  //

  void startReduc(ExprId exp, Value val);
  bool isReduc() const { return redExp != detail::kInvalidId; }
  void updateReduc(Value val);
  Value getReduc() const { return redVal; }
  Value endReduc();
  void setValidLexInsert(Value val);
  void clearValidLexInsert();
  Value getValidLexInsert() const { return redValidLexInsert; }

  void startCustomReduc(ExprId exp);
  bool isCustomReduc() const { return redCustom != detail::kInvalidId; }
  Value getCustomRedId();
  void endCustomReduc();

private:
  // Linalg operation.
  linalg::GenericOp linalgOp;

  // Sparsification options.
  SparsificationOptions sparseOptions;

  // Merger helper class.
  Merger latticeMerger;

  // Loop emitter helper class.
  LoopEmitter loopEmitter;

  // Topological sort.  This serves as a mapping from `LoopOrd` to `LoopId`
  // (cf., `getLoopVar` and `topSortAt`).
  std::vector<LoopId> topSort;

  // Sparse tensor as output. Implemented either through direct injective
  // insertion in lexicographic index order or through access pattern
  // expansion in the innermost loop nest (`expValues` through `expCount`).
  OpOperand *sparseOut;
  // The count of outer non-filter loops, as defined by `isAdmissibleTopoOrder`.
  LoopOrd outerParNest;
  Value insChain;
  Value expValues;
  Value expFilled;
  Value expAdded;
  Value expCount;

  // Bookkeeping for reductions (up-to-date value of the reduction, and indices
  // into the merger's expression tree. When the indices of a tensor reduction
  // expression are exhausted, all inner loops can use a scalarized reduction.
  Value redVal;
  ExprId redExp;
  ExprId redCustom;

  // Bookkeeping for lex insertion during reductions. Holds the runtime boolean
  // value of whether any reduction occurred. This is only set during a
  // reduction and cleared once the reduction is finished.
  Value redValidLexInsert;

  // The root tensor expression of the kernel.
  ExprId tensorExp;
};

} // namespace sparse_tensor
} // namespace mlir

#endif // MLIR_DIALECT_SPARSETENSOR_TRANSFORMS_CODEGENENV_H_