12#include <unordered_set>
17namespace Autoscheduler {
23using std::unordered_set;
49template<
typename PostCreateMutator>
51 const LoopNest *new_loop_nest_parent,
53 const PostCreateMutator &post_create_mutator) {
54 new_loop_nest->
copy_from(*existing_loop_nest);
56 for (std::size_t i = 0, N = new_loop_nest->
children.size(); i < N; ++i) {
58 new_loop_nest->
children[i] = new_child;
59 deep_copy_loop_nest(new_child, new_loop_nest, existing_loop_nest->children[i], post_create_mutator);
62 post_create_mutator(new_loop_nest);
65using LoopNestMap = map<const LoopNest *, pair<const LoopNest *, int>>;
67template<
typename PostCreateMutator>
69 const PostCreateMutator &post_create_mutator) {
105 template<
typename PostCreateMutator>
134 const Target &target)
const;
145 bool verbose =
false)
const;
150 std::ostream &out)
const;
168 const Target &target)
const;
171 const Target &target)
const;
178 bool verbose =
false);
192 const vector<VarOrRVar> ¶llel_vars,
193 const vector<int64_t> ¶llel_extents,
194 const vector<int> &constant_extents)
const;
198 const vector<VarOrRVar> ¶llel_vars,
199 const vector<int64_t> ¶llel_extents)
const;
203 std::unordered_set<std::string> &new_serial_vars,
204 std::ostringstream &staged_funcs_schedule_source)
const;
234 struct CompareStates {
236 return a->cost > b->cost;
240 std::vector<IntrusivePtr<State>> storage;
245 if (sz >= storage.size()) {
246 storage.resize(std::max(sz * 2, (
size_t)64));
248 internal_assert(sz < storage.size()) << sz <<
" " << storage.size() <<
"\n";
249 storage[sz] = std::move(s);
251 std::push_heap(storage.begin(), storage.begin() + sz, CompareStates{});
255 internal_assert(sz <= storage.size()) << sz <<
" " << storage.size() <<
"\n";
256 std::pop_heap(storage.begin(), storage.begin() + sz, CompareStates{});
258 return std::move(storage[sz]);
274 storage.swap(other.storage);
275 std::swap(sz, other.sz);
283 std::make_heap(storage.begin(), storage.begin() + sz, CompareStates{});
287 for (
size_t i = 0; i < sz; i++) {
#define internal_assert(c)
const IntrusivePtr< State > & top()
void emplace(IntrusivePtr< State > &&s)
IntrusivePtr< State > pop()
void swap(StateQueue &other) noexcept
IntrusivePtr< State > operator[](int idx) const
A class representing a reference count to be used with IntrusivePtr.
A single definition of a Func.
bool compute_root_and_inline_only()
int64_t get_stack_memory_limit()
bool use_adjusted_tilings()
constexpr int kLocalMemoryLimit
double get_stack_memory_adjustment_factor()
bool verify_memoized_features()
map< const LoopNest *, pair< const LoopNest *, int > > LoopNestMap
bool is_memoize_blocks_enabled()
void deep_copy_loop_nest(LoopNest *new_loop_nest, const LoopNest *new_loop_nest_parent, const IntrusivePtr< const LoopNest > &existing_loop_nest, const PostCreateMutator &post_create_mutator)
This file defines the class FunctionDAG, which is our representation of a Halide pipeline,...
@ Internal
Not visible externally, similar to 'static' linkage in C.
unsigned __INT64_TYPE__ uint64_t
signed __INT64_TYPE__ int64_t
std::vector< IntrusivePtr< const LoopNest > > children
void copy_from(const LoopNest &n)
void operator()(LoopNest *new_loop_nest) const
void operator()(LoopNest *new_loop_nest) const
void split_compute_root_loops(LoopNest *loop_nest) const
void add_outer_thread_loops(LoopNest *loop_nest) const
const Anderson2021Params & params
void save_featurization(const FunctionDAG &dag, const Anderson2021Params ¶ms, const Target &target, std::ostream &out) const
bool exceeds_serial_extents_limit(const Target &target) const
int64_t get_shared_mem_alloc_size(const LoopNest *block, const LoopNest *loop) const
bool has_compute_root_loops_without_blocks() const
const LoopNest * deepest_valid_compute_location(const Anderson2021Params ¶ms, const LoopNestMap &parent, const FunctionDAG::Node &node, const LoopNest *loop, const LoopNest *root, StageMap< int64_t > &total_shared_mem_alloc_sizes) const
void update_always_consider_inline_options(const FunctionDAG::Node *node)
int64_t total_loop_extents_of_ancestors(const LoopNestMap &parent, const LoopNest *loop) const
bool can_fuse_gpu(const vector< int64_t > ¶llel_extents) const
bool should_always_consider_inline(const FunctionDAG::Node *node) const
bool contains_store_at_further_in_than_outermost() const
void operator=(const State &)=delete
void compute_loop_nest_parents(LoopNestMap &p, const LoopNest *here, int depth) const
State(const State &)=delete
bool has_dynamic_allocation_inside_thread() const
void fuse_gpu_blocks(LoopNest::StageScheduleState *state, Stage &stage, const vector< VarOrRVar > ¶llel_vars, const vector< int64_t > ¶llel_extents, const vector< int > &constant_extents) const
void set_gpu_store_site(const LoopNestMap &parent, const LoopNest *loop, LoopNest::Sites &site) const
void print_compute_locations() const
bool mark_gpu_threads(LoopNest::StageScheduleState *state, Stage &stage, std::unordered_set< std::string > &new_serial_vars, std::ostringstream &staged_funcs_schedule_source) const
NodeMap< bool > always_consider_inline
IntrusivePtr< const State > parent
void operator=(State &&)=delete
uint64_t structural_hash(int depth) const
IntrusivePtr< const LoopNest > root
bool exceeds_shared_memory_limit(const Anderson2021Params ¶ms, const Target &target) const
bool compute_featurization(const FunctionDAG &dag, const Anderson2021Params ¶ms, const Target &target, StageMap< ScheduleFeatures > *features, Statistics &stats, bool verbose=false) const
bool contains_store_at(const set< const FunctionDAG::Node * > &outermost_store_at, const IntrusivePtr< const LoopNest > &parent) const
void mark_gpu_blocks(LoopNest::StageScheduleState *state, Stage &stage, const vector< VarOrRVar > ¶llel_vars, const vector< int64_t > ¶llel_extents) const
IntrusivePtr< const LoopNest > get_root_for_features(const Anderson2021Params ¶ms, const Target &target) const
bool calculate_cost(const FunctionDAG &dag, const Anderson2021Params ¶ms, const Target &target, CostModel *cost_model, Statistics &stats, bool verbose=false)
LoopNest * create_feature_root(const PostCreateMutator &post_create_mutator) const
const LoopNest * deepest_common_ancestor(const LoopNestMap &parent, const LoopNest *a, const LoopNest *b) const
IntrusivePtr< State > make_child() const
bool exceeds_local_memory_limit(const Anderson2021Params ¶ms, const Target &target) const
void add_to_always_consider_inline_options(const FunctionDAG::Node *node)
void apply_schedule(const FunctionDAG &dag, const Anderson2021Params ¶ms, const Target &target)
bool has_loop_nest_without_thread_loops() const
std::vector< double > cost_per_stage
Intrusive shared pointers have a reference count (a RefCount object) stored in the class itself.
A struct representing a target machine and os to generate code for.