Halide
LoopNest.h
Go to the documentation of this file.
1 /** This file defines the LoopNest, which is our
2  * representation of a Halide schedule, and contains methods to
3  * generate candidates for scheduling as well as extract a
4  * featurization that can be used to cost each candidate. */
5 
6 #ifndef LOOP_NEST_H
7 #define LOOP_NEST_H
8 
9 #include "FunctionDAG.h"
10 #include "PerfectHashMap.h"
11 #include <set>
12 #include <vector>
13 
14 namespace Halide {
15 namespace Internal {
16 namespace Autoscheduler {
17 
18 template<typename T>
20 
21 template<typename T>
23 
24 bool may_subtile();
25 
26 // Given a multi-dimensional box of dimensionality d, generate a list
27 // of candidate tile sizes for it, logarithmically spacing the sizes
28 // using the given factor. If 'allow_splits' is false, every dimension
29 // must either be one, or the full extent of the box. This function is
30 // used to generate candidate tilings when tiling for
31 // producer-consumer fusion, or tiling for parallelism.
32 std::vector<std::vector<int64_t>> generate_tilings(const vector<int64_t> &s, int d, int factor, bool allow_splits);
33 
34 struct LoopNest {
36 
37  // The extents of this loop. Put another way, the number of tiles,
38  // not the size of each tile.
39  std::vector<int64_t> size;
40 
41  // The nodes inside the loop body
42  std::vector<IntrusivePtr<const LoopNest>> children;
43 
44  // Funcs inlined into this inner loop, and the number of times
45  // each is called. Only valid if children is empty.
47 
48  // Funcs stored inside this loop
49  std::set<const FunctionDAG::Node *> store_at;
50 
51  // The total bounds required of any given Func over all iterations
52  // of this loop. In the paper, this is represented using the
53  // little boxes to the left of the loop nest tree figures.
55 
56  // The Func this loop nest belongs to
57  const FunctionDAG::Node *node = nullptr;
58 
59  // The stage of the Func
60  const FunctionDAG::Node::Stage *stage = nullptr;
61 
62  // Is this the innermost loop of this func (the SIMD loop)?
63  bool innermost = false;
64 
65  // Are we permitted to tile this loop?
66  bool tileable = false;
67 
68  // Is this the parallel outer loop?
69  bool parallel = false;
70 
71  // What dimension is this Func vectorized over, in terms of the pure args of the Func?
72  int vector_dim = -1;
73 
74  // Which loop corresponds to the innermost storage dimension and will be vectorized. -1 means none of them.
76 
77  void copy_from(const LoopNest &n);
78 
79  static void hash_combine(uint64_t &h, uint64_t next) {
80  // From boost
81  h ^= (next + 0x9e3779b9 + (h << 6) + (h >> 2));
82  }
83 
84  // Hash the loop structure and sizes up to a fixed depth. This is
85  // used as the hash function for the coarse-to-fine beam search in
86  // the paper.
87  void structural_hash(uint64_t &h, int depth) const;
88 
89  // How many funcs are scheduled inside this loop level. Used in
90  // the structural hash.
91  size_t funcs_realized_or_inlined() const {
92  size_t count = inlined.size() + store_at.size();
93  for (const auto &c : children) {
94  count += c->funcs_realized_or_inlined();
95  }
96  return count;
97  }
98 
99  // All of a stage's interesting locations in the loop nest. Used to help compute the featurization of a stage.
100  struct Sites {
101  const LoopNest *compute = nullptr; // Its containing compute_at site
102  const LoopNest *store = nullptr; // Its containing store_at site
103  const LoopNest *produce = nullptr; // Its own outermost node
104  const LoopNest *innermost = nullptr; // Its innermost node - usually a SIMD loop
105  const LoopNest *task = nullptr; // The parallel for loop it belongs to
106  bool inlined = false; // Is the Func inlined?
107  };
108 
109  // Compute all the sites of interest for each pipeline stage
110  void get_sites(StageMap<Sites> &sites,
111  const LoopNest *task = nullptr,
112  const LoopNest *parent = nullptr) const;
113 
114  // A helper for the working_set_at_task feature. Most features are
115  // computed in the recursive pass 'compute_features' below, but
116  // this one must be done in a second separate recursive pass.
118  StageMap<ScheduleFeatures> *features) const {
119  for (const auto &c : children) {
120  c->set_working_set_at_task_feature(working_set, features);
121  features->get(c->stage).working_set_at_task = working_set;
122  }
123  }
124 
125  // Do a recursive walk over the loop nest computing features to feed the cost model.
126  void compute_features(const FunctionDAG &dag,
127  const MachineParams &params,
128  const StageMap<Sites> &sites,
129  int64_t instances,
130  int64_t parallelism,
131  const LoopNest *parent,
132  const LoopNest *grandparent,
133  const LoopNest &root,
134  int64_t *working_set,
135  StageMap<ScheduleFeatures> *features) const;
136 
137  bool is_root() const {
138  // The root is the sole node without a Func associated with
139  // it.
140  return node == nullptr;
141  }
142 
143  // Set the region required of a Func at this site.
144  const Bound &set_bounds(const FunctionDAG::Node *f, BoundContents *b) const {
145  return bounds.emplace(f, b);
146  }
147 
148  // Get the region required of a Func at this site, from which we
149  // know what region would be computed if it were scheduled here,
150  // and what its loop nest would be.
151  const Bound &get_bounds(const FunctionDAG::Node *f) const;
152 
153  // Recursively print a loop nest representation to stderr
154  void dump(string prefix, const LoopNest *parent) const;
155 
156  // Does this loop nest access the given Func
157  bool calls(const FunctionDAG::Node *f) const;
158 
159  // What is the maximum number of inlined calls to a Func that
160  // occur within this loop. Used to prune states that would
161  // generate too much code.
162  int64_t max_inlined_calls() const;
163 
164  // Does this loop nest access an input buffer? Used to select
165  // trail strategies when splitting loops. We don't want to read
166  // out of bounds on inputs, even if we don't intend to use the
167  // values read. It could create annoying assertion failures for
168  // the user. It's OK to read out of range of the values computed
169  // on internal Funcs though. Allocation bounds inference just pads
170  // out the bounds so that it won't fault.
171  bool accesses_input_buffer() const;
172 
173  // Does this loop nest contain a computation of the given Func.
174  bool computes(const FunctionDAG::Node *f) const;
175 
176  // Above here most methods query the loop nest. Below we have
177  // methods that mutate the loop nest.
178 
179  // Inline a Func into all consumers within this loop.
180  void inline_func(const FunctionDAG::Node *f);
181 
182  // Compute a Func at this site.
183  void compute_here(const FunctionDAG::Node *f, bool tileable, int v);
184 
185  // Parallelize this loop according to the given tiling.
187  const vector<int64_t> &tiling,
188  const LoopNest *parent) const;
189 
190  // Return all possible ways to compute f in tiles somewhere within
191  // this loop nest.
192  std::vector<IntrusivePtr<const LoopNest>> compute_in_tiles(const FunctionDAG::Node *f,
193  const LoopNest *parent,
194  const MachineParams &params,
195  int v,
196  bool in_realization) const;
197 
198  // Below here we have methods that apply a schedule to a Halide pipeline.
199 
200  // A model of the state of the loop nest of a Func while applying
201  // Halide's scheduling directives.
202 
203  // Note that StageScheduleState is movable-but-not-copyable thanks
204  // to its ostringstream member.
206  // How much parallelism do we need to exploit with this Func?
207  double num_cores = 0;
208 
209  // Which storage dimension is vectorized? We need to reorder it innermost
210  int vector_dim = -1;
212 
213  // The various Vars and RVars used for scheduling a Func.
214  struct FuncVar {
215  // The top-level var or rvar this was split off from
217 
218  // This var.
220 
221  // Source code to access this Var/RVar. Used for printing
222  // valid Halide source for this schedule.
223  string accessor;
224 
225  // Our estimate of the extent of this var. This is exact
226  // when constant_extent flag is true.
228 
229  // Which index in the symbolic loop nest does this var
230  // belong to.
231  size_t index = 0;
232 
233  // Some flags.
234  bool innermost_pure_dim = false,
235  outermost = false,
236  parallel = false,
237  exists = false,
238  pure = false,
241  : orig(Var()), var(Var()) {
242  }
243  };
244 
245  // In order from innermost to outermost. Each group of d is one tiling level.
246  std::vector<FuncVar> vars;
247 
248  std::ostringstream schedule_source;
249  };
250 
251  // Apply the schedule represented by this loop nest to a Halide pipeline.
252  void apply(LoopLevel here,
253  StageMap<std::unique_ptr<StageScheduleState>> &state_map,
254  double num_cores,
255  int depth,
256  const LoopNest *parent,
257  const LoopNest *compute_site) const;
258 };
259 
260 } // namespace Autoscheduler
261 } // namespace Internal
262 } // namespace Halide
263 
264 #endif // LOOP_NEST_H
Halide::MachineParams
A struct representing the machine parameters to generate the auto-scheduled code for.
Definition: Pipeline.h:31
Halide::Internal::Autoscheduler::LoopNest::Sites::task
const LoopNest * task
Definition: LoopNest.h:105
Halide::Internal::Autoscheduler::LoopNest::StageScheduleState
Definition: LoopNest.h:205
Halide::Internal::Autoscheduler::LoopNest::StageScheduleState::FuncVar::accessor
string accessor
Definition: LoopNest.h:223
Halide::Internal::Autoscheduler::BoundContents
Definition: FunctionDAG.h:253
Halide::Internal::Autoscheduler::LoopNest::ref_count
RefCount ref_count
Definition: LoopNest.h:35
Halide::Var
A Halide variable, to be used when defining functions.
Definition: Var.h:19
Halide::Internal::Autoscheduler::LoopNest::vectorized_loop_index
int vectorized_loop_index
Definition: LoopNest.h:75
Halide::Internal::Autoscheduler::LoopNest::Sites::compute
const LoopNest * compute
Definition: LoopNest.h:101
Halide::Internal::Autoscheduler::LoopNest::StageScheduleState::FuncVar::FuncVar
FuncVar()
Definition: LoopNest.h:240
Halide::Internal::Autoscheduler::may_subtile
bool may_subtile()
Halide::Internal::Autoscheduler::LoopNest::StageScheduleState::FuncVar::var
VarOrRVar var
Definition: LoopNest.h:219
Halide::Internal::Autoscheduler::LoopNest::set_bounds
const Bound & set_bounds(const FunctionDAG::Node *f, BoundContents *b) const
Definition: LoopNest.h:144
Halide::Internal::Autoscheduler::LoopNest::StageScheduleState::FuncVar::extent
int64_t extent
Definition: LoopNest.h:227
Halide::Internal::Autoscheduler::FunctionDAG
Definition: FunctionDAG.h:346
Halide::Internal::Autoscheduler::LoopNest::Sites::innermost
const LoopNest * innermost
Definition: LoopNest.h:104
Halide::Internal::Autoscheduler::LoopNest::get_bounds
const Bound & get_bounds(const FunctionDAG::Node *f) const
Halide::Internal::Autoscheduler::LoopNest::node
const FunctionDAG::Node * node
Definition: LoopNest.h:57
Halide::Internal::Autoscheduler::LoopNest::StageScheduleState::FuncVar::index
size_t index
Definition: LoopNest.h:231
Halide::Internal::Autoscheduler::LoopNest::set_working_set_at_task_feature
void set_working_set_at_task_feature(int64_t working_set, StageMap< ScheduleFeatures > *features) const
Definition: LoopNest.h:117
Halide::Internal::Autoscheduler::LoopNest::parallel
bool parallel
Definition: LoopNest.h:69
Halide::Internal::Autoscheduler::LoopNest::accesses_input_buffer
bool accesses_input_buffer() const
Halide::Internal::Autoscheduler::LoopNest::Sites::store
const LoopNest * store
Definition: LoopNest.h:102
PerfectHashMap::get
const T & get(const K *n) const
Definition: PerfectHashMap.h:258
Halide::Internal::IntrusivePtr
Intrusive shared pointers have a reference count (a RefCount object) stored in the class itself.
Definition: IntrusivePtr.h:68
Halide::Internal::Autoscheduler::LoopNest::calls
bool calls(const FunctionDAG::Node *f) const
Halide::Internal::Autoscheduler::LoopNest::max_inlined_calls
int64_t max_inlined_calls() const
uint64_t
unsigned __INT64_TYPE__ uint64_t
Definition: runtime_internal.h:19
Halide::Internal::Autoscheduler::LoopNest::parallelize_in_tiles
IntrusivePtr< const LoopNest > parallelize_in_tiles(const MachineParams &params, const vector< int64_t > &tiling, const LoopNest *parent) const
FunctionDAG.h
Halide::Internal::Autoscheduler::LoopNest::innermost
bool innermost
Definition: LoopNest.h:63
Halide
This file defines the class FunctionDAG, which is our representation of a Halide pipeline,...
Definition: AddAtomicMutex.h:21
PerfectHashMap.h
Halide::LinkageType::Internal
@ Internal
Not visible externally, similar to 'static' linkage in C.
Halide::Internal::Autoscheduler::LoopNest::copy_from
void copy_from(const LoopNest &n)
Halide::Internal::Autoscheduler::LoopNest::inlined
NodeMap< int64_t > inlined
Definition: LoopNest.h:46
Halide::Internal::Autoscheduler::LoopNest::bounds
NodeMap< Bound > bounds
Definition: LoopNest.h:54
Halide::Internal::Autoscheduler::LoopNest::computes
bool computes(const FunctionDAG::Node *f) const
Halide::Internal::Autoscheduler::LoopNest
Definition: LoopNest.h:34
Halide::Internal::Autoscheduler::LoopNest::get_sites
void get_sites(StageMap< Sites > &sites, const LoopNest *task=nullptr, const LoopNest *parent=nullptr) const
Halide::Internal::Autoscheduler::LoopNest::apply
void apply(LoopLevel here, StageMap< std::unique_ptr< StageScheduleState >> &state_map, double num_cores, int depth, const LoopNest *parent, const LoopNest *compute_site) const
Halide::Internal::Autoscheduler::LoopNest::StageScheduleState::FuncVar::parallel
bool parallel
Definition: LoopNest.h:236
int64_t
signed __INT64_TYPE__ int64_t
Definition: runtime_internal.h:18
Halide::Internal::Autoscheduler::LoopNest::StageScheduleState::vectorized_loop_index
int vectorized_loop_index
Definition: LoopNest.h:211
Halide::Internal::Autoscheduler::LoopNest::size
std::vector< int64_t > size
Definition: LoopNest.h:39
Halide::Internal::Autoscheduler::FunctionDAG::Node::Stage
Definition: FunctionDAG.h:426
Halide::Internal::Autoscheduler::LoopNest::store_at
std::set< const FunctionDAG::Node * > store_at
Definition: LoopNest.h:49
Halide::Internal::Autoscheduler::LoopNest::StageScheduleState::FuncVar::pure
bool pure
Definition: LoopNest.h:238
Halide::Internal::Autoscheduler::LoopNest::Sites::produce
const LoopNest * produce
Definition: LoopNest.h:103
Halide::Internal::Autoscheduler::LoopNest::Sites::inlined
bool inlined
Definition: LoopNest.h:106
Halide::Internal::Autoscheduler::LoopNest::hash_combine
static void hash_combine(uint64_t &h, uint64_t next)
Definition: LoopNest.h:79
Halide::Internal::Autoscheduler::LoopNest::stage
const FunctionDAG::Node::Stage * stage
Definition: LoopNest.h:60
Halide::Internal::Autoscheduler::generate_tilings
std::vector< std::vector< int64_t > > generate_tilings(const vector< int64_t > &s, int d, int factor, bool allow_splits)
Halide::Internal::Autoscheduler::LoopNest::Sites
Definition: LoopNest.h:100
Halide::Internal::Autoscheduler::LoopNest::dump
void dump(string prefix, const LoopNest *parent) const
Halide::Internal::Autoscheduler::LoopNest::compute_in_tiles
std::vector< IntrusivePtr< const LoopNest > > compute_in_tiles(const FunctionDAG::Node *f, const LoopNest *parent, const MachineParams &params, int v, bool in_realization) const
Halide::Internal::Autoscheduler::LoopNest::StageScheduleState::num_cores
double num_cores
Definition: LoopNest.h:207
Halide::LoopLevel
A reference to a site in a Halide statement at the top of the body of a particular for loop.
Definition: Schedule.h:143
Halide::Internal::Autoscheduler::LoopNest::StageScheduleState::vector_dim
int vector_dim
Definition: LoopNest.h:210
Halide::Internal::Autoscheduler::LoopNest::StageScheduleState::FuncVar::innermost_pure_dim
bool innermost_pure_dim
Definition: LoopNest.h:234
PerfectHashMap
Definition: PerfectHashMap.h:38
Halide::Internal::Autoscheduler::LoopNest::compute_features
void compute_features(const FunctionDAG &dag, const MachineParams &params, const StageMap< Sites > &sites, int64_t instances, int64_t parallelism, const LoopNest *parent, const LoopNest *grandparent, const LoopNest &root, int64_t *working_set, StageMap< ScheduleFeatures > *features) const
Halide::Internal::Autoscheduler::LoopNest::StageScheduleState::FuncVar::constant_extent
bool constant_extent
Definition: LoopNest.h:239
Halide::Internal::RefCount
A class representing a reference count to be used with IntrusivePtr.
Definition: IntrusivePtr.h:19
Halide::Internal::Autoscheduler::LoopNest::structural_hash
void structural_hash(uint64_t &h, int depth) const
Halide::Internal::Autoscheduler::LoopNest::funcs_realized_or_inlined
size_t funcs_realized_or_inlined() const
Definition: LoopNest.h:91
Halide::Internal::Autoscheduler::LoopNest::tileable
bool tileable
Definition: LoopNest.h:66
Halide::VarOrRVar
A class that can represent Vars or RVars.
Definition: Func.h:30
Halide::Internal::Autoscheduler::LoopNest::StageScheduleState::schedule_source
std::ostringstream schedule_source
Definition: LoopNest.h:248
Halide::Internal::Autoscheduler::FunctionDAG::Node
Definition: FunctionDAG.h:357
Halide::Internal::Autoscheduler::LoopNest::inline_func
void inline_func(const FunctionDAG::Node *f)
Halide::Internal::Autoscheduler::LoopNest::StageScheduleState::FuncVar::exists
bool exists
Definition: LoopNest.h:237
Halide::Internal::Autoscheduler::LoopNest::children
std::vector< IntrusivePtr< const LoopNest > > children
Definition: LoopNest.h:42
Halide::Internal::Autoscheduler::LoopNest::StageScheduleState::FuncVar::outermost
bool outermost
Definition: LoopNest.h:235
Halide::Internal::Autoscheduler::LoopNest::StageScheduleState::FuncVar::orig
VarOrRVar orig
Definition: LoopNest.h:216
Halide::Internal::Autoscheduler::LoopNest::compute_here
void compute_here(const FunctionDAG::Node *f, bool tileable, int v)
Halide::Internal::Autoscheduler::LoopNest::is_root
bool is_root() const
Definition: LoopNest.h:137
Halide::Internal::Autoscheduler::LoopNest::vector_dim
int vector_dim
Definition: LoopNest.h:72
Halide::Internal::Autoscheduler::LoopNest::StageScheduleState::vars
std::vector< FuncVar > vars
Definition: LoopNest.h:246
Halide::Internal::Autoscheduler::LoopNest::StageScheduleState::FuncVar
Definition: LoopNest.h:214