Halide 21.0.0
Halide compiler and libraries
Loading...
Searching...
No Matches
FunctionDAG.h
Go to the documentation of this file.
1/** This file defines the class FunctionDAG, which is our
2 * representation of a Halide pipeline, and contains methods to using
3 * Halide's bounds tools to query properties of it. */
4
5#ifndef FUNCTION_DAG_H
6#define FUNCTION_DAG_H
7
8#include <algorithm>
9#include <cstdint>
10#include <map>
11#include <string>
12#include <utility>
13
14#include <vector>
15
16#include "Featurization.h"
17#include "HalidePlugin.h"
18
19namespace Halide {
20namespace Internal {
21namespace Autoscheduler {
22
23using std::map;
24using std::pair;
25using std::string;
26using std::unique_ptr;
27using std::vector;
28
29struct Adams2019Params;
30
31// First we have various utility classes.
32
33// An optional rational type used when analyzing memory dependencies.
35 bool exists = false;
37
38 OptionalRational() = default;
40 : exists(e), numerator(n), denominator(d) {
41 }
42
43 void operator+=(const OptionalRational &other) {
44 if (!exists || !other.exists) {
45 exists = false;
46 return;
47 }
48 if (denominator == other.denominator) {
49 numerator += other.numerator;
50 return;
51 }
52
55 denominator = l;
56 numerator += other.numerator * (l / other.denominator);
58 numerator /= g;
59 denominator /= g;
60 }
61
63 if ((*this) == 0) {
64 return *this;
65 }
66 if (other == 0) {
67 return other;
68 }
69 int64_t num = numerator * other.numerator;
70 int64_t den = denominator * other.denominator;
71 bool e = exists && other.exists;
72 return OptionalRational{e, num, den};
73 }
74
75 // Because this type is optional (exists may be false), we don't
76 // have a total ordering. These methods all return false when the
77 // operators are not comparable, so a < b is not the same as !(a
78 // >= b).
79 bool operator<(int x) const {
80 if (!exists) {
81 return false;
82 }
83 if (denominator > 0) {
84 return numerator < x * denominator;
85 } else {
86 return numerator > x * denominator;
87 }
88 }
89
90 bool operator<=(int x) const {
91 if (!exists) {
92 return false;
93 }
94 if (denominator > 0) {
95 return numerator <= x * denominator;
96 } else {
97 return numerator >= x * denominator;
98 }
99 }
100
101 bool operator>(int x) const {
102 if (!exists) {
103 return false;
104 }
105 return !((*this) <= x);
106 }
107
108 bool operator>=(int x) const {
109 if (!exists) {
110 return false;
111 }
112 return !((*this) < x);
113 }
114
115 bool operator==(int x) const {
116 return exists && (numerator == (x * denominator));
117 }
118
119 bool operator==(const OptionalRational &other) const {
120 return (exists == other.exists) && (numerator * other.denominator == denominator * other.numerator);
121 }
122};
123
124// A LoadJacobian records the derivative of the coordinate accessed in
125// some producer w.r.t the loops of the consumer.
127 vector<vector<OptionalRational>> coeffs;
128 int64_t c;
129
130public:
131 explicit LoadJacobian(vector<vector<OptionalRational>> &&matrix, int64_t c = 1)
132 : coeffs(matrix), c(c) {
133 }
134
135 size_t producer_storage_dims() const {
136 return coeffs.size();
137 }
138
139 size_t consumer_loop_dims() const {
140 if (coeffs.empty() || coeffs[0].empty()) {
141 // The producer is scalar, and we don't know how
142 // many consumer loops there are.
143 return 0;
144 }
145 return coeffs[0].size();
146 }
147
148 OptionalRational operator()(int producer_storage_dim, int consumer_loop_dim) const {
149 if (coeffs.empty()) {
150 // The producer is scalar, so all strides are zero.
151 return {true, 0, 1};
152 }
153 internal_assert(producer_storage_dim < (int)coeffs.size());
154 const auto &p = coeffs[producer_storage_dim];
155 if (p.empty()) {
156 // The consumer is scalar, so all strides are zero.
157 return {true, 0, 1};
158 }
159 internal_assert(consumer_loop_dim < (int)p.size());
160 return p[consumer_loop_dim];
161 }
162
163 // To avoid redundantly re-recording copies of the same
164 // load Jacobian, we keep a count of how many times a
165 // load with this Jacobian occurs.
166 int64_t count() const {
167 return c;
168 }
169
170 // Try to merge another LoadJacobian into this one, increasing the
171 // count if the coefficients match.
172 bool merge(const LoadJacobian &other) {
173 if (other.coeffs.size() != coeffs.size()) {
174 return false;
175 }
176 for (size_t i = 0; i < coeffs.size(); i++) {
177 if (other.coeffs[i].size() != coeffs[i].size()) {
178 return false;
179 }
180 for (size_t j = 0; j < coeffs[i].size(); j++) {
181 if (!(other.coeffs[i][j] == coeffs[i][j])) {
182 return false;
183 }
184 }
185 }
186 c += other.count();
187 return true;
188 }
189
190 // Multiply Jacobians, used to look at memory dependencies through
191 // inlined functions.
192 LoadJacobian operator*(const LoadJacobian &other) const {
193 vector<vector<OptionalRational>> matrix;
195 matrix.resize(producer_storage_dims());
196 for (size_t i = 0; i < producer_storage_dims(); i++) {
197 matrix[i].resize(other.consumer_loop_dims());
198 for (size_t j = 0; j < other.consumer_loop_dims(); j++) {
199 matrix[i][j] = OptionalRational{true, 0, 1};
200 for (size_t k = 0; k < consumer_loop_dims(); k++) {
201 matrix[i][j] += (*this)(i, k) * other(k, j);
202 }
203 }
204 }
205 LoadJacobian result(std::move(matrix), count() * other.count());
206 return result;
207 }
208
209 void dump(std::ostream &os, const char *prefix) const;
210};
211
212// Classes to represent a concrete set of bounds for a Func. A Span is
213// single-dimensional, and a Bound is a multi-dimensional box. For
214// each dimension we track the estimated size, and also whether or not
215// the size is known to be constant at compile-time. For each Func we
216// track three different types of bounds:
217
218// 1) The region required by consumers of the Func, which determines
219// 2) The region actually computed, which in turn determines
220// 3) The min and max of all loops in the loop next.
221
222// 3 in turn determines the region required of the inputs to a Func,
223// which determines their region computed, and hence their loop nest,
224// and so on back up the Function DAG from outputs back to inputs.
225
226class Span {
227 int64_t min_, max_;
228 bool constant_extent_;
229
230public:
231 int64_t min() const {
232 return min_;
233 }
234 int64_t max() const {
235 return max_;
236 }
237 int64_t extent() const {
238 return max_ - min_ + 1;
239 }
240 bool constant_extent() const {
241 return constant_extent_;
242 }
243
244 void union_with(const Span &other) {
245 min_ = std::min(min_, other.min());
246 max_ = std::max(max_, other.max());
247 constant_extent_ = constant_extent_ && other.constant_extent();
248 }
249
251 max_ = min_ + e - 1;
252 }
253
255 min_ += x;
256 max_ += x;
257 }
258
259 Span(int64_t a, int64_t b, bool c)
260 : min_(a), max_(b), constant_extent_(c) {
261 }
262 Span() = default;
263 Span(const Span &other) = default;
264 static Span empty_span() {
265 return Span(INT64_MAX, INT64_MIN, true);
266 }
267};
268
269// Bounds objects are created and destroyed very frequently while
270// exploring scheduling options, so we have a custom allocator and
271// memory pool. Much like IR nodes, we treat them as immutable once
272// created and wrapped in a Bound object so that they can be shared
273// safely across scheduling alternatives.
276
277 class Layout;
278 const Layout *layout = nullptr;
279
280 Span *data() const {
281 // This struct is a header
282 return (Span *)(const_cast<BoundContents *>(this) + 1);
283 }
284
286 return data()[i];
287 }
288
290 return data()[i + layout->computed_offset];
291 }
292
293 Span &loops(int i, int j) {
294 return data()[j + layout->loop_offset[i]];
295 }
296
297 const Span &region_required(int i) const {
298 return data()[i];
299 }
300
301 const Span &region_computed(int i) const {
302 return data()[i + layout->computed_offset];
303 }
304
305 const Span &loops(int i, int j) const {
306 return data()[j + layout->loop_offset[i]];
307 }
308
310 auto *b = layout->make();
311 size_t bytes = sizeof(data()[0]) * layout->total_size;
312 memcpy(b->data(), data(), bytes);
313 return b;
314 }
315
316 void validate() const;
317
318 // We're frequently going to need to make these concrete bounds
319 // arrays. It makes things more efficient if we figure out the
320 // memory layout of those data structures once ahead of time, and
321 // make each individual instance just use that. Note that this is
322 // not thread-safe.
323 class Layout {
324 // A memory pool of free BoundContent objects with this layout
325 mutable std::vector<BoundContents *> pool;
326
327 // All the blocks of memory allocated
328 mutable std::vector<void *> blocks;
329
330 mutable size_t num_live = 0;
331
332 void allocate_some_more() const;
333
334 public:
335 // number of Span to allocate
337
338 // region_required has size func->dimensions() and comes first in the memory layout
339
340 // region_computed comes next at the following index
342
343 // the loop for each stage starts at the following index
344 std::vector<int> loop_offset;
345
346 Layout() = default;
348
349 Layout(const Layout &) = delete;
350 void operator=(const Layout &) = delete;
351 Layout(Layout &&) = delete;
352 void operator=(Layout &&) = delete;
353
354 // Make a BoundContents object with this layout
356
357 // Release a BoundContents object with this layout back to the pool
358 void release(const BoundContents *b) const;
359 };
360};
361
363
364// A representation of the function DAG. The nodes and edges are both
365// in reverse realization order, so if you want to walk backwards up
366// the DAG, just iterate the nodes or edges in-order.
368
369 // An edge is a producer-consumer relationship
370 struct Edge;
371
376
377 // A Node represents a single Func
378 struct Node {
379 // A pointer back to the owning DAG
381
382 // The Halide Func this represents
384
385 // The number of bytes per point stored.
387
388 // The min/max variables used to denote a symbolic region of
389 // this Func. Used in the cost above, and in the Edges below.
390 vector<SymbolicInterval> region_required;
391
392 // A concrete region required from a bounds estimate. Only
393 // defined for outputs.
395
396 // The region computed of a Func, in terms of the region
397 // required. For simple Funcs this is identical to the
398 // region_required. However, in some Funcs computing one
399 // output requires computing other outputs too. You can't
400 // really ask for a single output pixel from something blurred
401 // with an IIR without computing the others, for example.
403 // The min and max in their full symbolic glory. We use
404 // these in the general case.
407
408 // Analysis used to accelerate common cases
411 };
412 vector<RegionComputedInfo> region_computed;
414
415 // Expand a region required into a region computed, using the
416 // symbolic intervals above.
417 void required_to_computed(const Span *required, Span *computed) const;
418
419 // Metadata about one symbolic loop in a Func's default loop nest.
420 struct Loop {
421 string var;
422 bool pure, rvar;
424
425 // Which pure dimension does this loop correspond to? Invalid if it's an rvar
427
428 // Precomputed metadata to accelerate common cases:
429
430 // If true, the loop bounds are just the region computed in the given dimension
433
434 // If true, the loop bounds are a constant with the given min and max
437
438 // A persistent fragment of source for getting this Var
439 // from its owner Func. Used for printing source code
440 // equivalent to a computed schedule.
441 string accessor;
442 };
443
444 // Get the loop nest shape as a function of the region computed
445 void loop_nest_for_region(int stage_idx, const Span *computed, Span *loop) const;
446
447 // One stage of a Func
448 struct Stage {
449 // The owning Node
451
452 // Which stage of the Func is this. 0 = pure.
453 int index;
454
455 // The loop nest that computes this stage, from innermost out.
456 vector<Loop> loop;
458
459 // The vectorization width that will be used for
460 // compute. Corresponds to the natural width for the
461 // narrowest type used.
463
464 // The featurization of the compute done
466
467 // The actual Halide front-end stage object
469
470 // The name for scheduling (e.g. "foo.update(3)")
471 string name;
472
473 // Ids for perfect hashing on stages.
474 int id, max_id;
475
476 vector<Edge *> incoming_edges;
477
478 vector<bool> dependencies;
479 bool downstream_of(const Node &n) const {
480 return dependencies[n.id];
481 };
482
484 : stage(std::move(s)) {
485 }
486 };
487 vector<Stage> stages;
488
489 vector<Edge *> outgoing_edges;
490
491 // Max vector size across the stages
493
494 // A unique ID for this node, allocated consecutively starting
495 // at zero for each pipeline.
496 int id, max_id;
497
498 // Just func->dimensions(), but we ask for it so many times
499 // that's it's worth avoiding the function call into
500 // libHalide.
502
503 // Is a single pointwise call to another Func
505
506 // We represent the input buffers as node, though we do not attempt to schedule them.
508
509 // Is one of the pipeline outputs
511
512 // Only uses pointwise calls
514
515 // Only uses pointwise calls + clamping on all indices
517
518 std::unique_ptr<BoundContents::Layout> bounds_memory_layout;
519
521 return bounds_memory_layout->make();
522 }
523 };
524
525 // A representation of a producer-consumer relationship
526 struct Edge {
527 struct BoundInfo {
528 // The symbolic expression for the bound in this dimension
530
531 // Fields below are the results of additional analysis
532 // used to evaluate this bound more quickly.
536
537 BoundInfo(const Expr &e, const Node::Stage &consumer, bool dependent);
538 };
539
540 // Memory footprint on producer required by consumer.
541 vector<pair<BoundInfo, BoundInfo>> bounds;
542
545
546 // The number of calls the consumer makes to the producer, per
547 // point in the loop nest of the consumer.
548 int calls;
549
551
552 vector<LoadJacobian> load_jacobians;
553
555
556 // Given a loop nest of the consumer stage, expand a region
557 // required of the producer to be large enough to include all
558 // points required.
559 void expand_footprint(const Span *consumer_loop, Span *producer_required) const;
560 };
561
562 vector<Node> nodes;
563 vector<Edge> edges;
564
565 // Create the function DAG, and do all the dependency and cost
566 // analysis. This is done once up-front before the tree search.
567 FunctionDAG(const vector<Function> &outputs, const Target &target);
568
569 void dump(std::ostream &os) const;
570
571private:
572 // Compute the featurization for the entire DAG
573 void featurize();
574
575public:
576 // This class uses a lot of internal pointers, so we'll make it uncopyable/unmovable.
577 FunctionDAG(const FunctionDAG &other) = delete;
578 FunctionDAG &operator=(const FunctionDAG &other) = delete;
579 FunctionDAG(FunctionDAG &&other) = delete;
581};
582
583} // namespace Autoscheduler
584} // namespace Internal
585} // namespace Halide
586
587#endif // FUNCTION_DAG_H
#define internal_assert(c)
Definition Error.h:218
void release(const BoundContents *b) const
OptionalRational operator()(int producer_storage_dim, int consumer_loop_dim) const
LoadJacobian operator*(const LoadJacobian &other) const
bool merge(const LoadJacobian &other)
LoadJacobian(vector< vector< OptionalRational > > &&matrix, int64_t c=1)
void dump(std::ostream &os, const char *prefix) const
Span(int64_t a, int64_t b, bool c)
Span(const Span &other)=default
void union_with(const Span &other)
A reference-counted handle to Halide's internal representation of a function.
Definition Function.h:39
A class representing a reference count to be used with IntrusivePtr.
A single definition of a Func.
Definition Func.h:70
A Halide variable, to be used when defining functions.
Definition Var.h:19
IntrusivePtr< const BoundContents > Bound
int64_t gcd(int64_t, int64_t)
The greatest common divisor of two integers.
int64_t lcm(int64_t, int64_t)
The least common multiple of two integers.
This file defines the class FunctionDAG, which is our representation of a Halide pipeline,...
signed __INT64_TYPE__ int64_t
void * memcpy(void *s1, const void *s2, size_t n)
A fragment of Halide syntax.
Definition Expr.h:258
const Span & loops(int i, int j) const
BoundInfo(const Expr &e, const Node::Stage &consumer, bool dependent)
void expand_footprint(const Span *consumer_loop, Span *producer_required) const
vector< pair< BoundInfo, BoundInfo > > bounds
void loop_nest_for_region(int stage_idx, const Span *computed, Span *loop) const
std::unique_ptr< BoundContents::Layout > bounds_memory_layout
void required_to_computed(const Span *required, Span *computed) const
FunctionDAG(FunctionDAG &&other)=delete
FunctionDAG(const FunctionDAG &other)=delete
FunctionDAG & operator=(FunctionDAG &&other)=delete
FunctionDAG & operator=(const FunctionDAG &other)=delete
FunctionDAG(const vector< Function > &outputs, const Target &target)
void operator+=(const OptionalRational &other)
Definition FunctionDAG.h:43
OptionalRational(bool e, int64_t n, int64_t d)
Definition FunctionDAG.h:39
bool operator==(const OptionalRational &other) const
OptionalRational operator*(const OptionalRational &other) const
Definition FunctionDAG.h:62
A class to represent ranges of Exprs.
Definition Interval.h:14
Intrusive shared pointers have a reference count (a RefCount object) stored in the class itself.
A struct representing a target machine and os to generate code for.
Definition Target.h:19