Halide 19.0.0
Halide compiler and libraries
Loading...
Searching...
No Matches
FunctionDAG.h
Go to the documentation of this file.
1/** This file defines the class FunctionDAG, which is our
2 * representation of a Halide pipeline, and contains methods to using
3 * Halide's bounds tools to query properties of it. */
4
5#ifndef FUNCTION_DAG_H
6#define FUNCTION_DAG_H
7
8#include <algorithm>
9#include <cstdint>
10#include <map>
11#include <string>
12#include <utility>
13#include <vector>
14
15#include "Errors.h"
16#include "Featurization.h"
17#include "Halide.h"
18#include "PerfectHashMap.h"
19
20namespace Halide {
21namespace Internal {
22namespace Autoscheduler {
23
24using std::map;
25using std::pair;
26using std::string;
27using std::unique_ptr;
28using std::vector;
29
30// First we have various utility classes.
31
32// An optional rational type used when analyzing memory dependencies.
35
36 bool exists() const {
37 return denominator != 0;
38 }
39
40 OptionalRational() = default;
45
46 void operator+=(const OptionalRational &other) {
47 if ((denominator & other.denominator) == 0) {
49 return;
50 }
51 if (denominator == other.denominator) {
52 numerator += other.numerator;
53 return;
54 }
55
58 denominator = l;
59 numerator += other.numerator * (l / other.denominator);
61 numerator /= g;
62 denominator /= g;
63 }
64
66 if ((*this) == 0) {
67 return *this;
68 }
69 int64_t num = numerator * factor;
70 return OptionalRational{num, denominator};
71 }
72
74 if ((*this) == 0) {
75 return *this;
76 }
77 if (other == 0) {
78 return other;
79 }
80 int64_t num = numerator * other.numerator;
81 int64_t den = denominator * other.denominator;
82 return OptionalRational{num, den};
83 }
84
85 // Because this type is optional (exists may be false), we don't
86 // have a total ordering. These methods all return false when the
87 // operators are not comparable, so a < b is not the same as !(a
88 // >= b).
89 bool operator<(int x) const {
90 if (denominator == 0) {
91 return false;
92 } else if (denominator > 0) {
93 return numerator < x * denominator;
94 } else {
95 return numerator > x * denominator;
96 }
97 }
98
99 bool operator<=(int x) const {
100 if (denominator == 0) {
101 return false;
102 } else if (denominator > 0) {
103 return numerator <= x * denominator;
104 } else {
105 return numerator >= x * denominator;
106 }
107 }
108
109 bool operator>(int x) const {
110 if (!exists()) {
111 return false;
112 }
113 return !((*this) <= x);
114 }
115
116 bool operator>=(int x) const {
117 if (!exists()) {
118 return false;
119 }
120 return !((*this) < x);
121 }
122
123 bool operator==(int x) const {
124 return exists() && (numerator == (x * denominator));
125 }
126
127 bool operator==(const OptionalRational &other) const {
128 return (exists() == other.exists()) && (numerator * other.denominator == denominator * other.numerator);
129 }
130};
131
132// A LoadJacobian records the derivative of the coordinate accessed in
133// some producer w.r.t the loops of the consumer.
135 std::vector<OptionalRational> coeffs;
136 int64_t c;
137 size_t rows, cols;
138
139public:
141 : c(count),
143 cols(consumer_loop_dims) {
144 coeffs.resize(rows * cols);
145 }
146
147 bool all_coeffs_exist() const {
148 for (const auto &coeff : coeffs) {
149 if (!coeff.exists()) {
150 return false;
151 }
152 }
153 return true;
154 }
155
156 bool empty() const {
157 return rows == 0;
158 }
159
160 size_t producer_storage_dims() const {
161 return rows;
162 }
163
164 size_t consumer_loop_dims() const {
165 return cols;
166 }
167
168 bool is_constant() const {
169 for (const auto &c : coeffs) {
170 if (!c.exists() || !(c == 0)) {
171 return false;
172 }
173 }
174
175 return true;
176 }
177
178 OptionalRational operator()(int producer_storage_dim, int consumer_loop_dim) const {
179 if (producer_storage_dims() == 0 || consumer_loop_dims() == 0) {
180 // The producer or consumer is scalar, so all strides are zero.
181 return {0, 1};
182 }
183 return coeffs[producer_storage_dim * cols + consumer_loop_dim];
184 }
185
186 OptionalRational &operator()(int producer_storage_dim, int consumer_loop_dim) {
187 return coeffs[producer_storage_dim * cols + consumer_loop_dim];
188 }
189
190 // To avoid redundantly re-recording copies of the same
191 // load Jacobian, we keep a count of how many times a
192 // load with this Jacobian occurs.
193 int64_t count() const {
194 return c;
195 }
196
197 // Try to merge another LoadJacobian into this one, increasing the
198 // count if the coefficients match.
199 bool merge(const LoadJacobian &other) {
200 if (other.rows != rows || other.cols != cols) {
201 return false;
202 }
203 for (size_t i = 0; i < rows * cols; i++) {
204 if (!(other.coeffs[i] == coeffs[i])) {
205 return false;
206 }
207 }
208 c += other.count();
209 return true;
210 }
211
212 // Scale the matrix coefficients by the given factors
213 LoadJacobian operator*(const std::vector<int64_t> &factors) const {
214 LoadJacobian result(rows, cols, c);
215 for (size_t i = 0; i < producer_storage_dims(); i++) {
216 for (size_t j = 0; j < consumer_loop_dims(); j++) {
217 result(i, j) = (*this)(i, j) * factors[j];
218 }
219 }
220 return result;
221 }
222
223 // Multiply Jacobians, used to look at memory dependencies through
224 // inlined functions.
225 LoadJacobian operator*(const LoadJacobian &other) const {
226 LoadJacobian result(producer_storage_dims(), other.consumer_loop_dims(), count() * other.count());
227 for (size_t i = 0; i < producer_storage_dims(); i++) {
228 for (size_t j = 0; j < other.consumer_loop_dims(); j++) {
229 result(i, j) = OptionalRational{0, 1};
230 for (size_t k = 0; k < consumer_loop_dims(); k++) {
231 result(i, j) += (*this)(i, k) * other(k, j);
232 }
233 }
234 }
235 return result;
236 }
237
238 void dump(const char *prefix) const;
239};
240
241// Classes to represent a concrete set of bounds for a Func. A Span is
242// single-dimensional, and a Bound is a multi-dimensional box. For
243// each dimension we track the estimated size, and also whether or not
244// the size is known to be constant at compile-time. For each Func we
245// track three different types of bounds:
246
247// 1) The region required by consumers of the Func, which determines
248// 2) The region actually computed, which in turn determines
249// 3) The min and max of all loops in the loop next.
250
251// 3 in turn determines the region required of the inputs to a Func,
252// which determines their region computed, and hence their loop nest,
253// and so on back up the Function DAG from outputs back to inputs.
254
255class Span {
256 int64_t min_, max_;
257 bool constant_extent_;
258
259public:
260 int64_t min() const {
261 return min_;
262 }
263 int64_t max() const {
264 return max_;
265 }
266 int64_t extent() const {
267 return max_ - min_ + 1;
268 }
269 bool constant_extent() const {
270 return constant_extent_;
271 }
272
273 void union_with(const Span &other) {
274 min_ = std::min(min_, other.min());
275 max_ = std::max(max_, other.max());
276 constant_extent_ = constant_extent_ && other.constant_extent();
277 }
278
280 max_ = min_ + e - 1;
281 }
282
284 min_ += x;
285 max_ += x;
286 }
287
288 Span(int64_t a, int64_t b, bool c)
289 : min_(a),
290 max_(b),
291 constant_extent_(c) {
292 }
293 Span() = default;
294 Span(const Span &other) = default;
295 static Span empty_span() {
296 return Span(INT64_MAX, INT64_MIN, true);
297 }
298};
299
300// Bounds objects are created and destroyed very frequently while
301// exploring scheduling options, so we have a custom allocator and
302// memory pool. Much like IR nodes, we treat them as immutable once
303// created and wrapped in a Bound object so that they can be shared
304// safely across scheduling alternatives.
306 mutable RefCount ref_count;
307
308 class Layout;
309 const Layout *layout = nullptr;
310
311 Span *data() const {
312 // This struct is a header
313 return (Span *)(const_cast<BoundContents *>(this) + 1);
314 }
315
317 return data()[i];
318 }
319
321 return data()[i + layout->computed_offset];
322 }
323
324 Span &loops(int i, int j) {
325 return data()[j + layout->loop_offset[i]];
326 }
327
328 const Span &region_required(int i) const {
329 return data()[i];
330 }
331
332 const Span &region_computed(int i) const {
333 return data()[i + layout->computed_offset];
334 }
335
336 const Span &loops(int i, int j) const {
337 return data()[j + layout->loop_offset[i]];
338 }
339
341 auto *b = layout->make();
342 size_t bytes = sizeof(data()[0]) * layout->total_size;
343 memcpy(b->data(), data(), bytes);
344 return b;
345 }
346
347 void validate() const;
348
349 // We're frequently going to need to make these concrete bounds
350 // arrays. It makes things more efficient if we figure out the
351 // memory layout of those data structures once ahead of time, and
352 // make each individual instance just use that. Note that this is
353 // not thread-safe.
354 class Layout {
355 // A memory pool of free BoundContent objects with this layout
356 mutable std::vector<BoundContents *> pool;
357
358 // All the blocks of memory allocated
359 mutable std::vector<void *> blocks;
360
361 mutable size_t num_live = 0;
362
363 void allocate_some_more() const;
364
365 public:
366 // number of Span to allocate
367 int total_size;
368
369 // region_computed comes next at the following index
370 int computed_offset;
371
372 // the loop for each stage starts at the following index
373 std::vector<int> loop_offset;
374
375 Layout() = default;
377
378 Layout(const Layout &) = delete;
379 void operator=(const Layout &) = delete;
380 Layout(Layout &&) = delete;
381 void operator=(Layout &&) = delete;
382
383 // Make a BoundContents object with this layout
385
386 // Release a BoundContents object with this layout back to the pool
387 void release(const BoundContents *b) const;
388 };
389};
390
392
393// A representation of the function DAG. The nodes and edges are both
394// in reverse realization order, so if you want to walk backwards up
395// the DAG, just iterate the nodes or edges in-order.
397
398 // An edge is a producer-consumer relationship
399 struct Edge;
400
404 };
405
406 // A Node represents a single Func
407 struct Node {
408 // A pointer back to the owning DAG
410
411 // The Halide Func this represents
413
414 // The number of bytes per point stored.
415 double bytes_per_point;
416
417 // The min/max variables used to denote a symbolic region of
418 // this Func. Used in the cost above, and in the Edges below.
419 vector<SymbolicInterval> region_required;
420
421 // A concrete region required from a bounds estimate. Only
422 // defined for outputs.
423 vector<Span> estimated_region_required;
424
425 // The region computed of a Func, in terms of the region
426 // required. For simple Funcs this is identical to the
427 // region_required. However, in some Funcs computing one
428 // output requires computing other outputs too. You can't
429 // really ask for a single output pixel from something blurred
430 // with an IIR without computing the others, for example.
432 // The min and max in their full symbolic glory. We use
433 // these in the general case.
434 Interval in;
435
436 // Analysis used to accelerate common cases
438 int64_t c_min = 0, c_max = 0;
439 };
440 vector<RegionComputedInfo> region_computed;
442
443 // Expand a region required into a region computed, using the
444 // symbolic intervals above.
445 void required_to_computed(const Span *required, Span *computed) const;
446
447 // Metadata about one symbolic loop in a Func's default loop nest.
448 struct Loop {
449 string var;
450 bool pure, rvar;
451 Expr min, max;
452
453 // Which pure dimension does this loop correspond to? Invalid if it's an rvar
454 int pure_dim;
455
456 // Precomputed metadata to accelerate common cases:
457
458 // If true, the loop bounds are just the region computed in the given dimension
459 bool equals_region_computed = false;
460 int region_computed_dim = 0;
461
462 // If true, the loop bounds are a constant with the given min and max
463 bool bounds_are_constant = false;
464 int64_t c_min = 0, c_max = 0;
465
466 // A persistent fragment of source for getting this Var
467 // from its owner Func. Used for printing source code
468 // equivalent to a computed schedule.
469 string accessor;
470 };
471
472 // Get the loop nest shape as a function of the region computed
473 void loop_nest_for_region(int stage_idx, const Span *computed, Span *loop) const;
474
475 // One stage of a Func
476 struct Stage {
477 // The owning Node
478 Node *node;
479
480 // Which stage of the Func is this. 0 = pure.
481 int index;
482
483 // The loop nest that computes this stage, from innermost out.
484 vector<Loop> loop;
485 bool loop_nest_all_common_cases = false;
486
487 // The vectorization width that will be used for
488 // compute. Corresponds to the natural width for the
489 // narrowest type used.
490 int vector_size;
491
492 // The featurization of the compute done
494
495 // The actual Halide front-end stage object
497
498 // The name for scheduling (e.g. "foo.update(3)")
499 string name;
500
502
503 // Ids for perfect hashing on stages.
504 int id, max_id;
505
506 std::unique_ptr<LoadJacobian> store_jacobian;
507
508 vector<Edge *> incoming_edges;
509
510 vector<bool> dependencies;
511 bool downstream_of(const Node &n) const {
512 return dependencies[n.id];
513 };
514
516 : stage(std::move(s)) {
517 }
518
519 int get_loop_index_from_var(const std::string &var) const {
520 int i = 0;
521 for (const auto &l : loop) {
522 if (l.var == var) {
523 return i;
524 }
525
526 ++i;
527 }
528
529 return -1;
530 }
531 };
532 vector<Stage> stages;
533
534 vector<Edge *> outgoing_edges;
535
536 // Max vector size across the stages
537 int vector_size;
538
539 // A unique ID for this node, allocated consecutively starting
540 // at zero for each pipeline.
541 int id, max_id;
542
543 // Just func->dimensions(), but we ask for it so many times
544 // that's it's worth avoiding the function call into
545 // libHalide.
546 int dimensions;
547
548 // Is a single pointwise call to another Func
549 bool is_wrapper;
550
551 // We represent the input buffers as node, though we do not attempt to schedule them.
552 bool is_input;
553
554 // Is one of the pipeline outputs
555 bool is_output;
556
557 // Only uses pointwise calls
558 bool is_pointwise;
559
560 // Only uses pointwise calls + clamping on all indices
562
563 std::unique_ptr<BoundContents::Layout> bounds_memory_layout;
564
566 return bounds_memory_layout->make();
567 }
568 };
569
570 // A representation of a producer-consumer relationship
571 struct Edge {
572 struct BoundInfo {
573 // The symbolic expression for the bound in this dimension
574 Expr expr;
575
576 // Fields below are the results of additional analysis
577 // used to evaluate this bound more quickly.
580 bool affine, uses_max;
581
583 };
584
585 // Memory footprint on producer required by consumer.
586 vector<pair<BoundInfo, BoundInfo>> bounds;
587
590
591 // The number of calls the consumer makes to the producer, per
592 // point in the loop nest of the consumer.
593 int calls;
594
596
597 vector<LoadJacobian> load_jacobians;
598
600
602
603 // Given a loop nest of the consumer stage, expand a region
604 // required of the producer to be large enough to include all
605 // points required.
606 void expand_footprint(const Span *consumer_loop, Span *producer_required) const;
607 };
608
609 vector<Node> nodes;
610 vector<Edge> edges;
611
613
614 // We're going to be querying this DAG a lot while searching for
615 // an optimal schedule, so we'll also create a variety of
616 // auxiliary data structures.
617 map<int, const Node *> stage_id_to_node_map;
618
619 // Create the function DAG, and do all the dependency and cost
620 // analysis. This is done once up-front before the tree search.
621 FunctionDAG(const vector<Function> &outputs, const Target &target);
622
623 void dump() const;
624 std::ostream &dump(std::ostream &os) const;
625
626 // This class uses a lot of internal pointers, so we'll hide the copy constructor.
627 FunctionDAG(const FunctionDAG &other) = delete;
628 void operator=(const FunctionDAG &other) = delete;
629
630private:
631 // Compute the featurization for the entire DAG
632 void featurize();
633
634 template<typename OS>
635 void dump_internal(OS &os) const;
636};
637
638template<typename T>
640
641class ExprBranching : public VariadicVisitor<ExprBranching, int, int> {
643
644private:
645 const NodeMap<int64_t> &inlined;
646
647public:
648 int visit(const Reinterpret *op);
649 int visit(const IntImm *op);
650 int visit(const UIntImm *op);
651 int visit(const FloatImm *op);
652 int visit(const StringImm *op);
653 int visit(const Broadcast *op);
654 int visit(const Cast *op);
655 int visit(const Variable *op);
656 int visit(const Add *op);
657 int visit(const Sub *op);
658 int visit(const Mul *op);
659 int visit(const Div *op);
660 int visit(const Mod *op);
661 int visit(const Min *op);
662 int visit(const Max *op);
663 int visit(const EQ *op);
664 int visit(const NE *op);
665 int visit(const LT *op);
666 int visit(const LE *op);
667 int visit(const GT *op);
668 int visit(const GE *op);
669 int visit(const And *op);
670 int visit(const Or *op);
671 int visit(const Not *op);
672 int visit(const Select *op);
673 int visit(const Ramp *op);
674 int visit(const Load *op);
675 int visit(const Call *op);
676 int visit(const Shuffle *op);
677 int visit(const Let *op);
678 int visit(const VectorReduce *op);
679 int visit_binary(const Expr &a, const Expr &b);
680 int visit_nary(const std::vector<Expr> &exprs);
681
682 explicit ExprBranching(const NodeMap<int64_t> &inlined)
683 : inlined{inlined} {
684 }
685
686 int compute(const Function &f);
687};
688
689void sanitize_names(std::string &str);
690
691} // namespace Autoscheduler
692} // namespace Internal
693} // namespace Halide
694
695#endif // FUNCTION_DAG_H
void release(const BoundContents *b) const
int visit_binary(const Expr &a, const Expr &b)
ExprBranching(const NodeMap< int64_t > &inlined)
int visit_nary(const std::vector< Expr > &exprs)
OptionalRational operator()(int producer_storage_dim, int consumer_loop_dim) const
LoadJacobian operator*(const LoadJacobian &other) const
LoadJacobian(size_t producer_storage_dims, size_t consumer_loop_dims, int64_t count)
OptionalRational & operator()(int producer_storage_dim, int consumer_loop_dim)
bool merge(const LoadJacobian &other)
void dump(const char *prefix) const
LoadJacobian operator*(const std::vector< int64_t > &factors) const
Span(int64_t a, int64_t b, bool c)
Span(const Span &other)=default
void union_with(const Span &other)
A reference-counted handle to Halide's internal representation of a function.
Definition Function.h:39
A class representing a reference count to be used with IntrusivePtr.
A visitor/mutator capable of passing arbitrary arguments to the visit methods using CRTP and returnin...
Definition IRVisitor.h:161
A single definition of a Func.
Definition Func.h:69
A Halide variable, to be used when defining functions.
Definition Var.h:19
void sanitize_names(std::string &str)
int64_t gcd(int64_t, int64_t)
The greatest common divisor of two integers.
int64_t lcm(int64_t, int64_t)
The least common multiple of two integers.
This file defines the class FunctionDAG, which is our representation of a Halide pipeline,...
@ Internal
Not visible externally, similar to 'static' linkage in C.
signed __INT64_TYPE__ int64_t
signed __INT32_TYPE__ int32_t
void * memcpy(void *s1, const void *s2, size_t n)
A fragment of Halide syntax.
Definition Expr.h:258
The sum of two expressions.
Definition IR.h:56
Logical and - are both expressions true.
Definition IR.h:175
const Span & loops(int i, int j) const
BoundInfo(const Expr &e, const Node::Stage &consumer)
void expand_footprint(const Span *consumer_loop, Span *producer_required) const
vector< pair< BoundInfo, BoundInfo > > bounds
int get_loop_index_from_var(const std::string &var) const
void loop_nest_for_region(int stage_idx, const Span *computed, Span *loop) const
std::unique_ptr< BoundContents::Layout > bounds_memory_layout
void required_to_computed(const Span *required, Span *computed) const
void operator=(const FunctionDAG &other)=delete
FunctionDAG(const FunctionDAG &other)=delete
map< int, const Node * > stage_id_to_node_map
std::ostream & dump(std::ostream &os) const
FunctionDAG(const vector< Function > &outputs, const Target &target)
void operator+=(const OptionalRational &other)
Definition FunctionDAG.h:46
bool operator==(const OptionalRational &other) const
OptionalRational operator*(int64_t factor) const
Definition FunctionDAG.h:65
OptionalRational operator*(const OptionalRational &other) const
Definition FunctionDAG.h:73
A vector with 'lanes' elements, in which every element is 'value'.
Definition IR.h:259
A function call.
Definition IR.h:490
The actual IR nodes begin here.
Definition IR.h:30
The ratio of two expressions.
Definition IR.h:83
Is the first expression equal to the second.
Definition IR.h:121
Floating point constants.
Definition Expr.h:236
Is the first expression greater than or equal to the second.
Definition IR.h:166
Is the first expression greater than the second.
Definition IR.h:157
Integer constants.
Definition Expr.h:218
A class to represent ranges of Exprs.
Definition Interval.h:14
Intrusive shared pointers have a reference count (a RefCount object) stored in the class itself.
Is the first expression less than or equal to the second.
Definition IR.h:148
Is the first expression less than the second.
Definition IR.h:139
A let expression, like you might find in a functional language.
Definition IR.h:271
Load a value from a named symbol if predicate is true.
Definition IR.h:217
The greater of two values.
Definition IR.h:112
The lesser of two values.
Definition IR.h:103
The remainder of a / b.
Definition IR.h:94
The product of two expressions.
Definition IR.h:74
Is the first expression not equal to the second.
Definition IR.h:130
Logical not - true if the expression false.
Definition IR.h:193
Logical or - is at least one of the expression true.
Definition IR.h:184
A linear ramp vector node.
Definition IR.h:247
Reinterpret value as another type, without affecting any of the bits (on little-endian systems).
Definition IR.h:47
A ternary operator.
Definition IR.h:204
Construct a new vector by taking elements from another sequence of vectors.
Definition IR.h:855
String constants.
Definition Expr.h:245
The difference of two expressions.
Definition IR.h:65
Unsigned integer constants.
Definition Expr.h:227
A named variable.
Definition IR.h:772
Horizontally reduce a vector to a scalar or narrower vector using the given commutative and associati...
Definition IR.h:979
A struct representing a target machine and os to generate code for.
Definition Target.h:19