Halide 19.0.0
Halide compiler and libraries
Loading...
Searching...
No Matches
LoopNest.h
Go to the documentation of this file.
1/** This file defines the LoopNest, which is our
2 * representation of a Halide schedule, and contains methods to
3 * generate candidates for scheduling as well as extract a
4 * featurization that can be used to cost each candidate. */
5
6#ifndef LOOP_NEST_H
7#define LOOP_NEST_H
8
9#include "FunctionDAG.h"
10#include "PerfectHashMap.h"
11#include <map>
12#include <set>
13#include <utility>
14#include <vector>
15
16namespace Halide {
17namespace Internal {
18namespace Autoscheduler {
19
20template<typename T>
22
23template<typename T>
25
26// Given a multi-dimensional box of dimensionality d, generate a list
27// of candidate tile sizes for it, logarithmically spacing the sizes
28// using the given factor. If 'allow_splits' is false, every dimension
29// must either be one, or the full extent of the box. This function is
30// used to generate candidate tilings when tiling for
31// producer-consumer fusion, or tiling for parallelism.
32std::vector<std::vector<int64_t>> generate_tilings(const vector<int64_t> &s, int d, int factor, bool allow_splits);
33
34struct LoopNest {
36
37 // The extents of this loop. Put another way, the number of tiles,
38 // not the size of each tile.
39 std::vector<int64_t> size;
40
41 // The nodes inside the loop body
42 std::vector<IntrusivePtr<const LoopNest>> children;
43
44 // Funcs inlined into this inner loop, and the number of times
45 // each is called. Only valid if children is empty.
47
48 // Funcs stored inside this loop
49 std::set<const FunctionDAG::Node *> store_at;
50
51 // The total bounds required of any given Func over all iterations
52 // of this loop. In the paper, this is represented using the
53 // little boxes to the left of the loop nest tree figures.
55
56 // The Func this loop nest belongs to
57 const FunctionDAG::Node *node = nullptr;
58
59 // The stage of the Func
61
62 // Is this the innermost loop of this func (the SIMD loop)?
63 bool innermost = false;
64
65 // Are we permitted to tile this loop?
66 bool tileable = false;
67
68 // Is this the parallel outer loop?
69 bool parallel = false;
70
71 // What dimension is this Func vectorized over, in terms of the pure args of the Func?
72 int vector_dim = -1;
73
74 // Which loop corresponds to the innermost storage dimension and will be vectorized. -1 means none of them.
76
77 void copy_from(const LoopNest &n);
78
79 static void hash_combine(uint64_t &h, uint64_t next) {
80 // From boost
81 h ^= (next + 0x9e3779b9 + (h << 6) + (h >> 2));
82 }
83
84 // Hash the loop structure and sizes up to a fixed depth. This is
85 // used as the hash function for the coarse-to-fine beam search in
86 // the paper.
87 void structural_hash(uint64_t &h, int depth) const;
88
89 // How many funcs are scheduled inside this loop level. Used in
90 // the structural hash.
92 size_t count = inlined.size() + store_at.size();
93 for (const auto &c : children) {
94 count += c->funcs_realized_or_inlined();
95 }
96 return count;
97 }
98
99 // All of a stage's interesting locations in the loop nest. Used to help compute the featurization of a stage.
100 struct Sites {
101 const LoopNest *compute = nullptr; // Its containing compute_at site
102 const LoopNest *store = nullptr; // Its containing store_at site
103 const LoopNest *produce = nullptr; // Its own outermost node
104 const LoopNest *innermost = nullptr; // Its innermost node - usually a SIMD loop
105 const LoopNest *task = nullptr; // The parallel for loop it belongs to
106 bool inlined = false; // Is the Func inlined?
107
108 // Used for caching features/feature intermediates.
110 };
111
112 // Compute all the sites of interest for each pipeline stage
114 const LoopNest *task = nullptr,
115 const LoopNest *parent = nullptr) const;
116
117 // A helper for the working_set_at_task feature. Most features are
118 // computed in the recursive pass 'compute_features' below, but
119 // this one must be done in a second separate recursive pass.
122 for (const auto &c : children) {
123 c->set_working_set_at_task_feature(working_set, features);
124 features->get(c->stage).working_set_at_task = working_set;
125 }
126 }
127
128 // Do a recursive walk over the loop nest computing features to feed the cost model.
130 const Adams2019Params &params,
131 const StageMap<Sites> &sites,
132 int64_t instances,
133 int64_t parallelism,
134 const LoopNest *parent,
135 const LoopNest *grandparent,
136 const LoopNest &root,
137 int64_t *working_set,
139 bool use_cached_features) const;
140
141 bool is_root() const {
142 // The root is the sole node without a Func associated with
143 // it.
144 return node == nullptr;
145 }
146
147 // Set the region required of a Func at this site.
148 const Bound &set_bounds(const FunctionDAG::Node *f, BoundContents *b) const {
149 return bounds.emplace(f, b);
150 }
151
152 // Get the region required of a Func at this site, from which we
153 // know what region would be computed if it were scheduled here,
154 // and what its loop nest would be.
155 const Bound &get_bounds(const FunctionDAG::Node *f) const;
156
157 // Recursively print a loop nest representation to stderr
158 void dump(std::ostream &os, string prefix, const LoopNest *parent) const;
159
160 // Does this loop nest access the given Func
161 bool calls(const FunctionDAG::Node *f) const;
162
163 // What is the maximum number of inlined calls to a Func that
164 // occur within this loop. Used to prune states that would
165 // generate too much code.
167
168 // Does this loop nest access an input buffer? Used to select
169 // trail strategies when splitting loops. We don't want to read
170 // out of bounds on inputs, even if we don't intend to use the
171 // values read. It could create annoying assertion failures for
172 // the user. It's OK to read out of range of the values computed
173 // on internal Funcs though. Allocation bounds inference just pads
174 // out the bounds so that it won't fault.
176
177 // Does this loop nest contain a computation of the given Func.
178 bool computes(const FunctionDAG::Node *f) const;
179
180 // Above here most methods query the loop nest. Below we have
181 // methods that mutate the loop nest.
182
183 // Inline a Func into all consumers within this loop.
185
186 // Compute a Func at this site.
187 void compute_here(const FunctionDAG::Node *f, bool tileable, int v, const Adams2019Params &params);
188
189 // Parallelize this loop according to the given tiling.
191 const vector<int64_t> &tiling,
192 const LoopNest *parent) const;
193
194 // Return all possible ways to compute f in tiles somewhere within
195 // this loop nest.
196 std::vector<IntrusivePtr<const LoopNest>> compute_in_tiles(const FunctionDAG::Node *f,
197 const LoopNest *parent,
198 const Adams2019Params &params,
199 int v,
200 bool in_realization) const;
201
202 // Below here we have methods that apply a schedule to a Halide pipeline.
203
204 // A model of the state of the loop nest of a Func while applying
205 // Halide's scheduling directives.
206
207 // Note that StageScheduleState is movable-but-not-copyable thanks
208 // to its ostringstream member.
209 struct StageScheduleState {
210 // How much parallelism do we need to exploit with this Func?
211 double num_cores = 0;
212
213 // Which storage dimension is vectorized? We need to reorder it innermost
214 int vector_dim = -1;
216
217 // The various Vars and RVars used for scheduling a Func.
218 struct FuncVar {
219 // The top-level var or rvar this was split off from
221
222 // This var.
224
225 // Source code to access this Var/RVar. Used for printing
226 // valid Halide source for this schedule.
227 string accessor;
228
229 // Our estimate of the extent of this var. This is exact
230 // when constant_extent flag is true.
232
233 // Which index in the symbolic loop nest does this var
234 // belong to.
235 size_t index = 0;
236
237 // Some flags.
238 bool innermost_pure_dim = false,
239 outermost = false,
240 parallel = false,
241 exists = false,
242 pure = false,
245 : orig(Var()), var(Var()) {
246 }
247 };
248
249 // In order from innermost to outermost. Each group of d is one tiling level.
250 std::vector<FuncVar> vars;
251
252 std::ostringstream schedule_source;
253 };
254
255 // Apply the schedule represented by this loop nest to a Halide pipeline.
256 void apply(LoopLevel here,
257 StageMap<std::unique_ptr<StageScheduleState>> &state_map,
258 double num_cores,
259 int depth,
260 const LoopNest *parent,
261 const LoopNest *compute_site) const;
262
263 // The below are two feature caches.
264 // hash of producers -> StageMap
265 mutable std::map<uint64_t, StageMap<StageMap<FeatureIntermediates>>> feature_intermediates_cache;
266 // hash of producers -> StageMap
267 mutable std::map<uint64_t, StageMap<ScheduleFeatures>> features_cache;
268
269 // Same as copy_from (above) but also copies the two caches.
271
272 // Loops through inlined funcs and caches the pcm found in features, into memoized_features.
275
276 // Merges features_to_insert into memoized_features if it does not already exist there.
278 const StageMap<ScheduleFeatures> *features_to_insert) const;
279
280 // Recalculates working_set from cached features
283
284 // Features need to be recomputed for inlined Funcs
287
288 // Create a (hopefully) unique hash of the producers.
290
291 // Gather all stages that are producers for any Func in this LoopNest.
292 std::vector<std::pair<int, int>> collect_producers(const StageMap<Sites> &sites) const;
293
294 // Collect all stages referenced in this LoopNest.
295 void collect_stages(std::set<const FunctionDAG::Node::Stage *> &stages) const;
296};
297
298// Find the deepest common ancestor of `a` and `b`.
299// `parents` is a map from loop nest to (parent, depth) tuples.
300// Assumes that `a` and `b` are found in `parents`, otherwise errors.
301const LoopNest *deepest_common_ancestor(const std::map<const LoopNest *, std::pair<const LoopNest *, int>> &parents,
302 const LoopNest *a, const LoopNest *b);
303
304// Compute the parent and depth of every loop nest node.
305// Stores in `parents` the children of `here` (keys) to tuples of (here, depth).
306// Recurses on all children of `here`.
307void compute_loop_nest_parents(std::map<const LoopNest *, std::pair<const LoopNest *, int>> &parents,
308 const LoopNest *here, int depth);
309
310} // namespace Autoscheduler
311} // namespace Internal
312} // namespace Halide
313
314#endif // LOOP_NEST_H
A class representing a reference count to be used with IntrusivePtr.
A reference to a site in a Halide statement at the top of the body of a particular for loop.
Definition Schedule.h:203
A Halide variable, to be used when defining functions.
Definition Var.h:19
std::vector< std::vector< int64_t > > generate_tilings(const vector< int64_t > &s, int d, int factor, bool allow_splits)
const LoopNest * deepest_common_ancestor(const std::map< const LoopNest *, std::pair< const LoopNest *, int > > &parents, const LoopNest *a, const LoopNest *b)
void compute_loop_nest_parents(std::map< const LoopNest *, std::pair< const LoopNest *, int > > &parents, const LoopNest *here, int depth)
This file defines the class FunctionDAG, which is our representation of a Halide pipeline,...
@ Internal
Not visible externally, similar to 'static' linkage in C.
unsigned __INT64_TYPE__ uint64_t
signed __INT64_TYPE__ int64_t
IntrusivePtr< const LoopNest > parallelize_in_tiles(const Adams2019Params &params, const vector< int64_t > &tiling, const LoopNest *parent) const
std::map< uint64_t, StageMap< ScheduleFeatures > > features
Definition LoopNest.h:146
const FunctionDAG::Node * node
Definition LoopNest.h:57
void recompute_inlined_features(const StageMap< Sites > &sites, StageMap< ScheduleFeatures > *features) const
void inline_func(const FunctionDAG::Node *f)
std::vector< IntrusivePtr< const LoopNest > > compute_in_tiles(const FunctionDAG::Node *f, const LoopNest *parent, const Adams2019Params &params, int v, bool in_realization) const
void collect_stages(std::set< const FunctionDAG::Node::Stage * > &stages) const
void get_sites(StageMap< Sites > &sites, const LoopNest *task=nullptr, const LoopNest *parent=nullptr) const
const Bound & get_bounds(const FunctionDAG::Node *f) const
void compute_features(const FunctionDAG &dag, const Adams2019Params &params, const StageMap< Sites > &sites, int64_t instances, int64_t parallelism, const LoopNest *parent, const LoopNest *grandparent, const LoopNest &root, int64_t *working_set, StageMap< ScheduleFeatures > *features, bool use_cached_features) const
void memoize_features(StageMap< ScheduleFeatures > &memoized_features, const StageMap< ScheduleFeatures > *features_to_insert) const
std::map< uint64_t, StageMap< ScheduleFeatures > > features_cache
Definition LoopNest.h:267
bool computes(const FunctionDAG::Node *f) const
std::vector< IntrusivePtr< const LoopNest > > children
Definition LoopNest.h:42
void set_working_set_at_task_feature(int64_t working_set, StageMap< ScheduleFeatures > *features) const
Definition LoopNest.h:120
std::vector< std::pair< int, int > > collect_producers(const StageMap< Sites > &sites) const
const Bound & set_bounds(const FunctionDAG::Node *f, BoundContents *b) const
Definition LoopNest.h:148
void memoize_points_computed_minimum(StageMap< ScheduleFeatures > &memoized_features, const StageMap< ScheduleFeatures > *features) const
static void hash_combine(uint64_t &h, uint64_t next)
Definition LoopNest.h:79
void dump(std::ostream &os, string prefix, const LoopNest *parent) const
uint64_t compute_hash_of_producers_stored_at_root(const StageMap< Sites > &sites) const
const FunctionDAG::Node::Stage * stage
Definition LoopNest.h:60
void compute_here(const FunctionDAG::Node *f, bool tileable, int v, const Adams2019Params &params)
bool calls(const FunctionDAG::Node *f) const
void copy_from_including_features(const LoopNest &n)
void structural_hash(uint64_t &h, int depth) const
std::set< const FunctionDAG::Node * > store_at
Definition LoopNest.h:49
std::map< uint64_t, StageMap< StageMap< FeatureIntermediates > > > feature_intermediates_cache
Definition LoopNest.h:265
void apply(LoopLevel here, StageMap< std::unique_ptr< StageScheduleState > > &state_map, double num_cores, int depth, const LoopNest *parent, const LoopNest *compute_site) const
void compute_working_set_from_features(int64_t *working_set, const StageMap< ScheduleFeatures > *features) const
Intrusive shared pointers have a reference count (a RefCount object) stored in the class itself.
A class that can represent Vars or RVars.
Definition Func.h:29