Halide 19.0.0
Halide compiler and libraries
Loading...
Searching...
No Matches
FunctionDAG.h
Go to the documentation of this file.
1/** This file defines the class FunctionDAG, which is our
2 * representation of a Halide pipeline, and contains methods to using
3 * Halide's bounds tools to query properties of it. */
4
5#ifndef FUNCTION_DAG_H
6#define FUNCTION_DAG_H
7
8#include <algorithm>
9#include <cstdint>
10#include <map>
11#include <string>
12#include <utility>
13
14#include <vector>
15
16#include "Errors.h"
17#include "Featurization.h"
18#include "Halide.h"
19
20namespace Halide {
21namespace Internal {
22namespace Autoscheduler {
23
24using std::map;
25using std::pair;
26using std::string;
27using std::unique_ptr;
28using std::vector;
29
30struct Adams2019Params;
31
32// First we have various utility classes.
33
34// An optional rational type used when analyzing memory dependencies.
35struct OptionalRational {
36 bool exists = false;
38
39 OptionalRational() = default;
41 : exists(e), numerator(n), denominator(d) {
42 }
43
44 void operator+=(const OptionalRational &other) {
45 if (!exists || !other.exists) {
46 exists = false;
47 return;
48 }
49 if (denominator == other.denominator) {
50 numerator += other.numerator;
51 return;
52 }
53
54 int64_t l = lcm(denominator, other.denominator);
56 denominator = l;
57 numerator += other.numerator * (l / other.denominator);
59 numerator /= g;
60 denominator /= g;
61 }
62
64 if ((*this) == 0) {
65 return *this;
66 }
67 if (other == 0) {
68 return other;
69 }
70 int64_t num = numerator * other.numerator;
71 int64_t den = denominator * other.denominator;
72 bool e = exists && other.exists;
73 return OptionalRational{e, num, den};
74 }
75
76 // Because this type is optional (exists may be false), we don't
77 // have a total ordering. These methods all return false when the
78 // operators are not comparable, so a < b is not the same as !(a
79 // >= b).
80 bool operator<(int x) const {
81 if (!exists) {
82 return false;
83 }
84 if (denominator > 0) {
85 return numerator < x * denominator;
86 } else {
87 return numerator > x * denominator;
88 }
89 }
90
91 bool operator<=(int x) const {
92 if (!exists) {
93 return false;
94 }
95 if (denominator > 0) {
96 return numerator <= x * denominator;
97 } else {
98 return numerator >= x * denominator;
99 }
100 }
101
102 bool operator>(int x) const {
103 if (!exists) {
104 return false;
105 }
106 return !((*this) <= x);
107 }
108
109 bool operator>=(int x) const {
110 if (!exists) {
111 return false;
112 }
113 return !((*this) < x);
114 }
115
116 bool operator==(int x) const {
117 return exists && (numerator == (x * denominator));
118 }
119
120 bool operator==(const OptionalRational &other) const {
121 return (exists == other.exists) && (numerator * other.denominator == denominator * other.numerator);
122 }
123};
124
125// A LoadJacobian records the derivative of the coordinate accessed in
126// some producer w.r.t the loops of the consumer.
127class LoadJacobian {
128 vector<vector<OptionalRational>> coeffs;
129 int64_t c;
130
131public:
132 explicit LoadJacobian(vector<vector<OptionalRational>> &&matrix, int64_t c = 1)
133 : coeffs(matrix), c(c) {
134 }
135
136 size_t producer_storage_dims() const {
137 return coeffs.size();
138 }
139
140 size_t consumer_loop_dims() const {
141 if (coeffs.empty() || coeffs[0].empty()) {
142 // The producer is scalar, and we don't know how
143 // many consumer loops there are.
144 return 0;
145 }
146 return coeffs[0].size();
147 }
148
149 OptionalRational operator()(int producer_storage_dim, int consumer_loop_dim) const {
150 if (coeffs.empty()) {
151 // The producer is scalar, so all strides are zero.
152 return {true, 0, 1};
153 }
154 internal_assert(producer_storage_dim < (int)coeffs.size());
155 const auto &p = coeffs[producer_storage_dim];
156 if (p.empty()) {
157 // The consumer is scalar, so all strides are zero.
158 return {true, 0, 1};
159 }
160 internal_assert(consumer_loop_dim < (int)p.size());
161 return p[consumer_loop_dim];
162 }
163
164 // To avoid redundantly re-recording copies of the same
165 // load Jacobian, we keep a count of how many times a
166 // load with this Jacobian occurs.
167 int64_t count() const {
168 return c;
169 }
170
171 // Try to merge another LoadJacobian into this one, increasing the
172 // count if the coefficients match.
173 bool merge(const LoadJacobian &other) {
174 if (other.coeffs.size() != coeffs.size()) {
175 return false;
176 }
177 for (size_t i = 0; i < coeffs.size(); i++) {
178 if (other.coeffs[i].size() != coeffs[i].size()) {
179 return false;
180 }
181 for (size_t j = 0; j < coeffs[i].size(); j++) {
182 if (!(other.coeffs[i][j] == coeffs[i][j])) {
183 return false;
184 }
185 }
186 }
187 c += other.count();
188 return true;
189 }
190
191 // Multiply Jacobians, used to look at memory dependencies through
192 // inlined functions.
193 LoadJacobian operator*(const LoadJacobian &other) const {
194 vector<vector<OptionalRational>> matrix;
195 internal_assert(consumer_loop_dims() == 0 || (consumer_loop_dims() == other.producer_storage_dims()));
196 matrix.resize(producer_storage_dims());
197 for (size_t i = 0; i < producer_storage_dims(); i++) {
198 matrix[i].resize(other.consumer_loop_dims());
199 for (size_t j = 0; j < other.consumer_loop_dims(); j++) {
200 matrix[i][j] = OptionalRational{true, 0, 1};
201 for (size_t k = 0; k < consumer_loop_dims(); k++) {
202 matrix[i][j] += (*this)(i, k) * other(k, j);
203 }
204 }
205 }
206 LoadJacobian result(std::move(matrix), count() * other.count());
207 return result;
208 }
209
210 void dump(std::ostream &os, const char *prefix) const;
211};
212
213// Classes to represent a concrete set of bounds for a Func. A Span is
214// single-dimensional, and a Bound is a multi-dimensional box. For
215// each dimension we track the estimated size, and also whether or not
216// the size is known to be constant at compile-time. For each Func we
217// track three different types of bounds:
218
219// 1) The region required by consumers of the Func, which determines
220// 2) The region actually computed, which in turn determines
221// 3) The min and max of all loops in the loop next.
222
223// 3 in turn determines the region required of the inputs to a Func,
224// which determines their region computed, and hence their loop nest,
225// and so on back up the Function DAG from outputs back to inputs.
226
227class Span {
228 int64_t min_, max_;
229 bool constant_extent_;
230
231public:
232 int64_t min() const {
233 return min_;
234 }
235 int64_t max() const {
236 return max_;
237 }
238 int64_t extent() const {
239 return max_ - min_ + 1;
240 }
241 bool constant_extent() const {
242 return constant_extent_;
243 }
244
245 void union_with(const Span &other) {
246 min_ = std::min(min_, other.min());
247 max_ = std::max(max_, other.max());
248 constant_extent_ = constant_extent_ && other.constant_extent();
249 }
250
252 max_ = min_ + e - 1;
253 }
254
256 min_ += x;
257 max_ += x;
258 }
259
260 Span(int64_t a, int64_t b, bool c)
261 : min_(a), max_(b), constant_extent_(c) {
262 }
263 Span() = default;
264 Span(const Span &other) = default;
265 static Span empty_span() {
266 return Span(INT64_MAX, INT64_MIN, true);
267 }
268};
269
270// Bounds objects are created and destroyed very frequently while
271// exploring scheduling options, so we have a custom allocator and
272// memory pool. Much like IR nodes, we treat them as immutable once
273// created and wrapped in a Bound object so that they can be shared
274// safely across scheduling alternatives.
275struct BoundContents {
277
278 class Layout;
279 const Layout *layout = nullptr;
280
281 Span *data() const {
282 // This struct is a header
283 return (Span *)(const_cast<BoundContents *>(this) + 1);
284 }
285
287 return data()[i];
288 }
289
291 return data()[i + layout->computed_offset];
292 }
293
294 Span &loops(int i, int j) {
295 return data()[j + layout->loop_offset[i]];
296 }
297
298 const Span &region_required(int i) const {
299 return data()[i];
300 }
301
302 const Span &region_computed(int i) const {
303 return data()[i + layout->computed_offset];
304 }
305
306 const Span &loops(int i, int j) const {
307 return data()[j + layout->loop_offset[i]];
308 }
309
311 auto *b = layout->make();
312 size_t bytes = sizeof(data()[0]) * layout->total_size;
313 memcpy(b->data(), data(), bytes);
314 return b;
315 }
316
317 void validate() const;
318
319 // We're frequently going to need to make these concrete bounds
320 // arrays. It makes things more efficient if we figure out the
321 // memory layout of those data structures once ahead of time, and
322 // make each individual instance just use that. Note that this is
323 // not thread-safe.
324 class Layout {
325 // A memory pool of free BoundContent objects with this layout
326 mutable std::vector<BoundContents *> pool;
327
328 // All the blocks of memory allocated
329 mutable std::vector<void *> blocks;
330
331 mutable size_t num_live = 0;
332
333 void allocate_some_more() const;
334
335 public:
336 // number of Span to allocate
338
339 // region_required has size func->dimensions() and comes first in the memory layout
340
341 // region_computed comes next at the following index
343
344 // the loop for each stage starts at the following index
345 std::vector<int> loop_offset;
346
347 Layout() = default;
349
350 Layout(const Layout &) = delete;
351 void operator=(const Layout &) = delete;
352 Layout(Layout &&) = delete;
353 void operator=(Layout &&) = delete;
354
355 // Make a BoundContents object with this layout
357
358 // Release a BoundContents object with this layout back to the pool
359 void release(const BoundContents *b) const;
360 };
361};
362
364
365// A representation of the function DAG. The nodes and edges are both
366// in reverse realization order, so if you want to walk backwards up
367// the DAG, just iterate the nodes or edges in-order.
368struct FunctionDAG {
369
370 // An edge is a producer-consumer relationship
371 struct Edge;
372
373 struct SymbolicInterval {
376 };
377
378 // A Node represents a single Func
379 struct Node {
380 // A pointer back to the owning DAG
382
383 // The Halide Func this represents
385
386 // The number of bytes per point stored.
388
389 // The min/max variables used to denote a symbolic region of
390 // this Func. Used in the cost above, and in the Edges below.
391 vector<SymbolicInterval> region_required;
392
393 // A concrete region required from a bounds estimate. Only
394 // defined for outputs.
396
397 // The region computed of a Func, in terms of the region
398 // required. For simple Funcs this is identical to the
399 // region_required. However, in some Funcs computing one
400 // output requires computing other outputs too. You can't
401 // really ask for a single output pixel from something blurred
402 // with an IIR without computing the others, for example.
403 struct RegionComputedInfo {
404 // The min and max in their full symbolic glory. We use
405 // these in the general case.
408
409 // Analysis used to accelerate common cases
412 };
413 vector<RegionComputedInfo> region_computed;
415
416 // Expand a region required into a region computed, using the
417 // symbolic intervals above.
418 void required_to_computed(const Span *required, Span *computed) const;
419
420 // Metadata about one symbolic loop in a Func's default loop nest.
421 struct Loop {
422 string var;
423 bool pure, rvar;
425
426 // Which pure dimension does this loop correspond to? Invalid if it's an rvar
428
429 // Precomputed metadata to accelerate common cases:
430
431 // If true, the loop bounds are just the region computed in the given dimension
434
435 // If true, the loop bounds are a constant with the given min and max
438
439 // A persistent fragment of source for getting this Var
440 // from its owner Func. Used for printing source code
441 // equivalent to a computed schedule.
442 string accessor;
443 };
444
445 // Get the loop nest shape as a function of the region computed
446 void loop_nest_for_region(int stage_idx, const Span *computed, Span *loop) const;
447
448 // One stage of a Func
449 struct Stage {
450 // The owning Node
452
453 // Which stage of the Func is this. 0 = pure.
454 int index;
455
456 // The loop nest that computes this stage, from innermost out.
457 vector<Loop> loop;
459
460 // The vectorization width that will be used for
461 // compute. Corresponds to the natural width for the
462 // narrowest type used.
464
465 // The featurization of the compute done
467
468 // The actual Halide front-end stage object
470
471 // The name for scheduling (e.g. "foo.update(3)")
472 string name;
473
474 // Ids for perfect hashing on stages.
475 int id, max_id;
476
477 vector<Edge *> incoming_edges;
478
479 vector<bool> dependencies;
480 bool downstream_of(const Node &n) const {
481 return dependencies[n.id];
482 };
483
485 : stage(std::move(s)) {
486 }
487 };
488 vector<Stage> stages;
489
490 vector<Edge *> outgoing_edges;
491
492 // Max vector size across the stages
494
495 // A unique ID for this node, allocated consecutively starting
496 // at zero for each pipeline.
497 int id, max_id;
498
499 // Just func->dimensions(), but we ask for it so many times
500 // that's it's worth avoiding the function call into
501 // libHalide.
503
504 // Is a single pointwise call to another Func
506
507 // We represent the input buffers as node, though we do not attempt to schedule them.
509
510 // Is one of the pipeline outputs
512
513 // Only uses pointwise calls
515
516 // Only uses pointwise calls + clamping on all indices
518
519 std::unique_ptr<BoundContents::Layout> bounds_memory_layout;
520
522 return bounds_memory_layout->make();
523 }
524 };
525
526 // A representation of a producer-consumer relationship
527 struct Edge {
528 struct BoundInfo {
529 // The symbolic expression for the bound in this dimension
531
532 // Fields below are the results of additional analysis
533 // used to evaluate this bound more quickly.
537
538 BoundInfo(const Expr &e, const Node::Stage &consumer, bool dependent);
539 };
540
541 // Memory footprint on producer required by consumer.
542 vector<pair<BoundInfo, BoundInfo>> bounds;
543
546
547 // The number of calls the consumer makes to the producer, per
548 // point in the loop nest of the consumer.
549 int calls;
550
552
553 vector<LoadJacobian> load_jacobians;
554
556
557 // Given a loop nest of the consumer stage, expand a region
558 // required of the producer to be large enough to include all
559 // points required.
560 void expand_footprint(const Span *consumer_loop, Span *producer_required) const;
561 };
562
563 vector<Node> nodes;
564 vector<Edge> edges;
565
566 // Create the function DAG, and do all the dependency and cost
567 // analysis. This is done once up-front before the tree search.
568 FunctionDAG(const vector<Function> &outputs, const Target &target);
569
570 void dump(std::ostream &os) const;
571
572private:
573 // Compute the featurization for the entire DAG
574 void featurize();
575
576public:
577 // This class uses a lot of internal pointers, so we'll make it uncopyable/unmovable.
578 FunctionDAG(const FunctionDAG &other) = delete;
579 FunctionDAG &operator=(const FunctionDAG &other) = delete;
580 FunctionDAG(FunctionDAG &&other) = delete;
582};
583
584} // namespace Autoscheduler
585} // namespace Internal
586} // namespace Halide
587
588#endif // FUNCTION_DAG_H
#define internal_assert(c)
Definition Errors.h:19
void release(const BoundContents *b) const
OptionalRational operator()(int producer_storage_dim, int consumer_loop_dim) const
LoadJacobian operator*(const LoadJacobian &other) const
bool merge(const LoadJacobian &other)
LoadJacobian(vector< vector< OptionalRational > > &&matrix, int64_t c=1)
void dump(std::ostream &os, const char *prefix) const
Span(int64_t a, int64_t b, bool c)
Span(const Span &other)=default
void union_with(const Span &other)
A reference-counted handle to Halide's internal representation of a function.
Definition Function.h:39
A class representing a reference count to be used with IntrusivePtr.
A single definition of a Func.
Definition Func.h:69
A Halide variable, to be used when defining functions.
Definition Var.h:19
int64_t gcd(int64_t, int64_t)
The greatest common divisor of two integers.
int64_t lcm(int64_t, int64_t)
The least common multiple of two integers.
This file defines the class FunctionDAG, which is our representation of a Halide pipeline,...
@ Internal
Not visible externally, similar to 'static' linkage in C.
signed __INT64_TYPE__ int64_t
void * memcpy(void *s1, const void *s2, size_t n)
A fragment of Halide syntax.
Definition Expr.h:258
const Span & loops(int i, int j) const
BoundInfo(const Expr &e, const Node::Stage &consumer, bool dependent)
void expand_footprint(const Span *consumer_loop, Span *producer_required) const
vector< pair< BoundInfo, BoundInfo > > bounds
void loop_nest_for_region(int stage_idx, const Span *computed, Span *loop) const
std::unique_ptr< BoundContents::Layout > bounds_memory_layout
void required_to_computed(const Span *required, Span *computed) const
FunctionDAG(FunctionDAG &&other)=delete
FunctionDAG(const FunctionDAG &other)=delete
FunctionDAG & operator=(FunctionDAG &&other)=delete
FunctionDAG & operator=(const FunctionDAG &other)=delete
FunctionDAG(const vector< Function > &outputs, const Target &target)
void operator+=(const OptionalRational &other)
Definition FunctionDAG.h:44
OptionalRational(bool e, int64_t n, int64_t d)
Definition FunctionDAG.h:40
bool operator==(const OptionalRational &other) const
OptionalRational operator*(const OptionalRational &other) const
Definition FunctionDAG.h:63
A class to represent ranges of Exprs.
Definition Interval.h:14
Intrusive shared pointers have a reference count (a RefCount object) stored in the class itself.
A struct representing a target machine and os to generate code for.
Definition Target.h:19