Go to the documentation of this file.
12 #include <unordered_set>
17 namespace Autoscheduler {
23 using std::unordered_set;
49 template<
typename PostCreateMutator>
51 new_loop_nest->
copy_from(*existing_loop_nest);
55 new_loop_nest->
children[i] = new_child;
59 post_create_mutator(new_loop_nest);
62 template<
typename PostCreateMutator>
90 const LoopNest *here,
int depth)
const;
97 template<
typename PostCreateMutator>
100 deep_copy_loop_nest<PostCreateMutator>(new_root,
nullptr,
root, post_create_mutator);
168 bool can_fuse_gpu(
const vector<int64_t> ¶llel_extents)
const;
188 struct CompareStates {
190 return a->cost > b->cost;
194 std::vector<IntrusivePtr<State>> storage;
199 if (sz >= storage.size()) {
200 storage.resize(
std::max(sz * 2, (
size_t)64));
202 internal_assert(sz < storage.size()) << sz <<
" " << storage.size() <<
"\n";
203 storage[sz] = std::move(s);
205 std::push_heap(storage.begin(), storage.begin() + sz, CompareStates{});
209 internal_assert(sz <= storage.size()) << sz <<
" " << storage.size() <<
"\n";
210 std::pop_heap(storage.begin(), storage.begin() + sz, CompareStates{});
212 return std::move(storage[sz]);
228 storage.swap(other.storage);
229 std::swap(sz, other.sz);
237 std::make_heap(storage.begin(), storage.begin() + sz, CompareStates{});
241 for (
size_t i = 0; i < sz; i++) {
bool calculate_cost(const FunctionDAG &dag, const Adams2019Params ¶ms, CostModel *cost_model, const CachingOptions &cache_options, int verbosity=99)
double get_stack_memory_adjustment_factor()
bool use_adjusted_tilings()
void operator()(LoopNest *new_loop_nest) const
#define internal_assert(c)
LoopNest * create_feature_root(const PostCreateMutator &post_create_mutator) const
IntrusivePtr< State > pop()
void save_featurization(const FunctionDAG &dag, const Adams2019Params ¶ms, const CachingOptions &cache_options, std::ostream &out)
void emplace(IntrusivePtr< State > &&s)
constexpr int kLocalMemoryLimit
void add_to_always_consider_inline_options(const FunctionDAG::Node *node)
void apply_schedule(const FunctionDAG &dag, const Adams2019Params ¶ms)
bool can_fuse_gpu(const vector< int64_t > ¶llel_extents) const
NodeMap< bool > always_consider_inline
IntrusivePtr< State > make_child() const
bool exceeds_serial_extents_limit(const Target &target) const
Intrusive shared pointers have a reference count (a RefCount object) stored in the class itself.
unsigned __INT64_TYPE__ uint64_t
This file defines the class FunctionDAG, which is our representation of a Halide pipeline,...
bool has_compute_root_loops_without_blocks() const
const IntrusivePtr< State > & top()
@ Internal
Not visible externally, similar to 'static' linkage in C.
void copy_from(const LoopNest &n)
int64_t get_stack_memory_limit()
IntrusivePtr< State > operator[](int idx) const
void operator=(const State &)=delete
bool contains_store_at_further_in_than_outermost() const
IntrusivePtr< const State > parent
void print_compute_locations() const
void deep_copy_loop_nest(LoopNest *new_loop_nest, const LoopNest *new_loop_nest_parent, const IntrusivePtr< const LoopNest > &existing_loop_nest, const PostCreateMutator &post_create_mutator)
bool has_dynamic_allocation_inside_thread() const
bool exceeds_local_memory_limit(const Anderson2021Params ¶ms, const Target &target) const
signed __INT64_TYPE__ int64_t
void fuse_gpu_blocks(LoopNest::StageScheduleState *state, Stage &stage, const vector< VarOrRVar > ¶llel_vars, const vector< int64_t > ¶llel_extents, const vector< int > &constant_extents) const
bool mark_gpu_threads(LoopNest::StageScheduleState *state, Stage &stage, std::unordered_set< std::string > &new_serial_vars, std::ostringstream &staged_funcs_schedule_source) const
IntrusivePtr< const LoopNest > root
std::vector< double > cost_per_stage
bool is_memoize_blocks_enabled()
const Anderson2021Params & params
bool exceeds_shared_memory_limit(const Anderson2021Params ¶ms, const Target &target) const
void set_gpu_store_site(const map< const LoopNest *, pair< const LoopNest *, int >> &parent, const LoopNest *loop, LoopNest::Sites &site) const
int64_t get_shared_mem_alloc_size(const LoopNest *block, const LoopNest *loop) const
bool has_loop_nest_without_thread_loops() const
void update_always_consider_inline_options(const FunctionDAG::Node *node)
void add_outer_thread_loops(LoopNest *loop_nest) const
void swap(StateQueue &other)
A class representing a reference count to be used with IntrusivePtr.
uint64_t structural_hash(int depth) const
void operator()(LoopNest *new_loop_nest) const
bool compute_root_and_inline_only()
void mark_gpu_blocks(LoopNest::StageScheduleState *state, Stage &stage, const vector< VarOrRVar > ¶llel_vars, const vector< int64_t > ¶llel_extents) const
void compute_featurization(const FunctionDAG &dag, const Adams2019Params ¶ms, StageMap< ScheduleFeatures > *features, const CachingOptions &cache_options)
Expr max(const FuncRef &a, const FuncRef &b)
const LoopNest * deepest_valid_compute_location(const Anderson2021Params ¶ms, const map< const LoopNest *, pair< const LoopNest *, int >> &parent, const FunctionDAG::Node &node, const LoopNest *loop, const LoopNest *root, StageMap< int64_t > &total_shared_mem_alloc_sizes) const
A single definition of a Func.
std::vector< IntrusivePtr< const LoopNest > > children
bool verify_memoized_features()
void split_compute_root_loops(LoopNest *loop_nest) const
A struct representing a target machine and os to generate code for.
bool should_always_consider_inline(const FunctionDAG::Node *node) const
int64_t total_loop_extents_of_ancestors(const map< const LoopNest *, pair< const LoopNest *, int >> &parent, const LoopNest *loop) const
void compute_loop_nest_parents(map< const LoopNest *, pair< const LoopNest *, int >> &p, const LoopNest *here, int depth) const
bool contains_store_at(const set< const FunctionDAG::Node * > &outermost_store_at, const IntrusivePtr< const LoopNest > &parent) const
const LoopNest * deepest_common_ancestor(const map< const LoopNest *, pair< const LoopNest *, int >> &parent, const LoopNest *a, const LoopNest *b) const
IntrusivePtr< const LoopNest > get_root_for_features(const Anderson2021Params ¶ms, const Target &target) const