Halide 19.0.0
Halide compiler and libraries
|
#include <LoopNest.h>
Classes | |
struct | FeatureIntermediates |
struct | Sites |
struct | StageScheduleState |
Public Member Functions | |
void | copy_from (const LoopNest &n) |
void | structural_hash (uint64_t &h, int depth) const |
size_t | funcs_realized_or_inlined () const |
void | get_sites (StageMap< Sites > &sites, const LoopNest *task=nullptr, const LoopNest *parent=nullptr) const |
void | set_working_set_at_task_feature (int64_t working_set, StageMap< ScheduleFeatures > *features) const |
void | compute_features (const FunctionDAG &dag, const Adams2019Params ¶ms, const StageMap< Sites > &sites, int64_t instances, int64_t parallelism, const LoopNest *parent, const LoopNest *grandparent, const LoopNest &root, int64_t *working_set, StageMap< ScheduleFeatures > *features, bool use_cached_features) const |
bool | is_root () const |
const Bound & | set_bounds (const FunctionDAG::Node *f, BoundContents *b) const |
const Bound & | get_bounds (const FunctionDAG::Node *f) const |
void | dump (std::ostream &os, string prefix, const LoopNest *parent) const |
bool | calls (const FunctionDAG::Node *f) const |
int64_t | max_inlined_calls () const |
bool | accesses_input_buffer () const |
bool | computes (const FunctionDAG::Node *f) const |
void | inline_func (const FunctionDAG::Node *f) |
void | compute_here (const FunctionDAG::Node *f, bool tileable, int v, const Adams2019Params ¶ms) |
IntrusivePtr< const LoopNest > | parallelize_in_tiles (const Adams2019Params ¶ms, const vector< int64_t > &tiling, const LoopNest *parent) const |
std::vector< IntrusivePtr< const LoopNest > > | compute_in_tiles (const FunctionDAG::Node *f, const LoopNest *parent, const Adams2019Params ¶ms, int v, bool in_realization) const |
void | apply (LoopLevel here, StageMap< std::unique_ptr< StageScheduleState > > &state_map, double num_cores, int depth, const LoopNest *parent, const LoopNest *compute_site) const |
void | copy_from_including_features (const LoopNest &n) |
void | memoize_points_computed_minimum (StageMap< ScheduleFeatures > &memoized_features, const StageMap< ScheduleFeatures > *features) const |
void | memoize_features (StageMap< ScheduleFeatures > &memoized_features, const StageMap< ScheduleFeatures > *features_to_insert) const |
void | compute_working_set_from_features (int64_t *working_set, const StageMap< ScheduleFeatures > *features) const |
void | recompute_inlined_features (const StageMap< Sites > &sites, StageMap< ScheduleFeatures > *features) const |
uint64_t | compute_hash_of_producers_stored_at_root (const StageMap< Sites > &sites) const |
std::vector< std::pair< int, int > > | collect_producers (const StageMap< Sites > &sites) const |
void | collect_stages (std::set< const FunctionDAG::Node::Stage * > &stages) const |
bool | is_gpu_serial (const Target &target) const |
bool | is_gpu_thread (const Target &target) const |
bool | is_gpu_block (const Target &target) const |
bool | is_scalar () const |
vector< int64_t > | get_union_thread_counts (const FunctionDAG::Node *f) const |
void | get_stage_sizes (const FunctionDAG::Node *f, vector< vector< int64_t > > &stage_sizes, vector< vector< int > > &pure_dims, vector< int > &vectorized_indices) const |
void | generate_vec_dim_serial_tilings (vector< int > &serial_sizes) const |
bool | add_gpu_thread_tilings (const FunctionDAG::Node *f, const Anderson2021Params ¶ms, const Target &target, int v, vector< IntrusivePtr< const LoopNest > > &result, const vector< int64_t > &max_size) |
void | copy_from (const LoopNest &n) |
void | copy_from_including_features (const LoopNest &n) |
void | structural_hash (uint64_t &h, int depth) const |
size_t | funcs_realized_or_inlined () const |
GPUMemoryType | get_gpu_memory_type (bool in_block, bool in_thread, bool is_inlined=false) const |
std::vector< int > | unrolled_loops (const Target &target, const LoopNest *parent, const LoopNest *grandparent) const |
void | get_allocs_that_can_be_promoted_to_registers (const Target &target, StageMap< Sites > &sites, NodeMap< bool > &can_be_promoted_to_registers, const LoopNest *grandparent, const LoopNest *parent) const |
bool | promote_allocs_to_registers (const Target &target, StageMap< Sites > &sites) const |
void | get_sites (const Target &target, StageMap< Sites > &sites, StageMap< int64_t > &shared_mem_alloc_sizes, const LoopNest *task=nullptr, const LoopNest *parent=nullptr, const LoopNest *current_thread_loop=nullptr) const |
void | set_working_set_at_task_feature (int64_t working_set, StageMap< ScheduleFeatures > *features) const |
bool | exceeds_serial_extents_limit (const Target &target, const LoopNest *parent, bool in_threads_loop) const |
bool | node_has_dynamic_region_computed (const FunctionDAG::Node *f) const |
bool | has_dynamic_allocation_inside_thread (bool in_thread_loop) const |
const LoopNest * | find_pure_stage_loop_nest (const FunctionDAG::Node *node) const |
int | get_pure_stage_vectorized_loop_index (const FunctionDAG::Node *node) const |
int | get_vectorized_loop_index_from_pure_stage (const LoopNest &root) const |
double | storage_stride (const LoadJacobian &jac, int innermost_storage_dim, const FunctionDAG::Node *storage_node, const Bound &store_bounds, const LoopNest &root) const |
Strides | compute_strides (const LoadJacobian &jac, int innermost_storage_dim, const FunctionDAG::Node *storage_node, const Bound &store_bounds, const ThreadInfo *thread_info, bool verbose=false) const |
bool | all_strides_exist (const LoadJacobian &jac, const FunctionDAG::Node *storage_node, const LoopNest &root) const |
int | get_actual_vector_dim (const Bound &store_bounds) const |
void | compute_gpu_store_features (const LoadJacobian &jac, int consumer_innermost_dim, const FunctionDAG::Node *node, const Bound &consumer_store_bounds, const GPULoopInfo &gpu_loop_info, const std::vector< int64_t > &inner_serial_loop_extents, const Sites &consumer_site, ScheduleFeatures &feat, const LoopNest *parent, const LoopNest &root, GlobalMemInfo &global_mem_loads, SharedMemInfo &shared_mem_loads, LocalMemInfo &local_mem_loads, bool verbose=false) const |
bool | can_vectorize_access_for_innermost_dim (const LoadJacobian &jac, const FunctionDAG::Node *accessed, int innermost_dim, int loop_index) const |
bool | can_vectorize_store_access (const LoadJacobian &jac, const FunctionDAG::Node *accessed, bool accessed_has_been_scheduled, int innermost_dim, int loop_index, const GPUMemoryType &mem_type) const |
int | vectorized_load_access_size (const LoadJacobian &jac, const FunctionDAG::Node *accessed, bool accessed_has_been_scheduled, int innermost_dim, const GPUMemoryType &mem_type, bool verbose=false) const |
int | vectorized_access_size (size_t loop_index, bool verbose=false) const |
template<typename T > | |
void | compute_num_mem_accesses_per_block (const LoadJacobian &jac, const FunctionDAG::Node *node, const Bound &store_bounds, const ThreadInfo *thread_info, int innermost_dim, double num_requests_per_warp, MemInfoType< T > &mem_info, bool verbose=false) const |
std::pair< double, double > | compute_local_mem_store_features (const LoadJacobian &jac, int consumer_innermost_dim, const FunctionDAG::Node *node, const Bound &consumer_store_bounds, const LoopNest &root, double serial_loop_extents) const |
template<typename T > | |
MemInfoType< T > | compute_mem_store_info (const LoadJacobian &jac, int consumer_innermost_dim, const FunctionDAG::Node *node, const Bound &consumer_store_bounds, const ThreadInfo *thread_info, double serial_loop_extents, bool verbose) const |
template<typename T > | |
void | compute_mem_load_features (const LoadJacobian &jac, int producer_innermost_dim, const FunctionDAG::Node *node, const Bound &producer_store_bounds, bool producer_has_been_scheduled, const ThreadInfo *thread_info, MemInfoType< T > &mem_info, double serial_loop_extents, bool verbose=false) const |
double | compute_local_mem_stride (double stride, double bytes) const |
const LoopNest * | get_enclosing_block (const LoopNest *parent, const LoopNest *grandparent) const |
std::pair< int64_t, int64_t > | get_block_and_serial_extents (const LoopNest *block) const |
bool | all_paths_to_leaves_have_thread_loop () const |
bool | has_thread_loop_descendant () const |
void | compute_warp_features (ScheduleFeatures &features, const GPULoopInfo &gpu_loop_info) const |
void | compute_warp_and_block_occupancy (const Anderson2021Params ¶ms, ScheduleFeatures &feat, const GPULoopInfo &gpu_loop_info) const |
void | compute_shared_mem_occupancy (const Anderson2021Params ¶ms, const Target &target, int64_t total_shared_mem_alloc_size, ScheduleFeatures &feat) const |
std::pair< const LoopNest *, const LoopNest * > | find_innermost_and_parent () const |
int64_t | points_accessed_per_thread (const Anderson2021Params ¶ms, const Target &target, const GPULoopInfo &gpu_loop_info, const std::vector< const FunctionDAG::Edge * > &edge_chain, const LoadJacobian &jac, const LoopNest *parent, const LoopNest *grandparent, int64_t n, const ScheduleFeatures &feat, const LoadJacobian &serial_jac, bool producer_has_been_scheduled, int producer_innermost_dim, const GPUMemoryType &mem_type, bool verbose) const |
int64_t | compute_licm_amortization (const LoopNest *innermost, const LoopNest *parent, const ScheduleFeatures &feat, const LoadJacobian &jac, int producer_dims) const |
void | memoize_points_computed_minimum (StageMap< ScheduleFeatures > &memoized_features, const StageMap< ScheduleFeatures > *features) const |
vector< pair< int, int > > | collect_producers (const StageMap< Sites > &sites) const |
uint64_t | compute_hash_of_producers_stored_at_root (const StageMap< Sites > &sites) const |
void | collect_stages (std::set< const FunctionDAG::Node::Stage * > &stages) const |
void | memoize_features (StageMap< ScheduleFeatures > &memoized_features, const StageMap< ScheduleFeatures > *features) const |
void | compute_working_set_from_features (int64_t *working_set, const StageMap< ScheduleFeatures > *features) const |
void | recompute_inlined_features (const StageMap< Sites > &sites, StageMap< ScheduleFeatures > *features) const |
std::pair< int64_t, bool > | compute_alloc_size_of_node_here (const FunctionDAG::Node *f) const |
void | compute_features (const FunctionDAG &dag, const Anderson2021Params ¶ms, const Target &target, const StageMap< Sites > &sites, int64_t instances, int64_t parallelism, const LoopNest *parent, const LoopNest *grandparent, const LoopNest &root, GPULoopInfo gpu_loop_info, bool use_memoized_features, const StageMap< int64_t > &total_shared_mem_alloc_sizes, int64_t *working_set, int64_t *working_set_local_constant, int64_t *working_set_local_dynamic, StageMap< ScheduleFeatures > *features, Statistics &stats, bool verbose=false) const |
bool | is_root () const |
const Bound & | set_bounds (const FunctionDAG::Node *f, BoundContents *b) const |
const Bound & | get_bounds (const FunctionDAG::Node *f) const |
Bound | get_bounds_along_edge_chain (const FunctionDAG::Node *f, const vector< const FunctionDAG::Edge * > &edge_chain) const |
void | dump () const |
std::string | to_string () const |
template<typename T > | |
void | dump (T &stream, string prefix, const LoopNest *parent) const |
bool | calls (const FunctionDAG::Node *f) const |
int64_t | max_inlined_calls () const |
bool | accesses_input_buffer () const |
bool | computes (const FunctionDAG::Node *f) const |
void | inline_func (const FunctionDAG::Node *f) |
bool | compute_here (const FunctionDAG::Node *f, bool tileable, int v, bool in_threads_loop, const Anderson2021Params ¶ms, const Target &target) |
IntrusivePtr< const LoopNest > | parallelize_in_tiles (const vector< int64_t > &tiling, const LoopNest *parent, const Anderson2021Params ¶ms, const Target &target, bool inner_tiling, bool adjust_tiling, bool move_all_rvars_inward=true, const vector< int > &rvars_to_move_inward={}) const |
int64_t | get_total_local_mem_alloc_size (bool constant_allocs_only=false, bool in_threads_loop=false) const |
int64_t | get_total_constant_local_mem_alloc_size () const |
bool | requires_dynamic_allocation (const FunctionDAG::Node *f, const Target &target, bool in_threads_loop) const |
vector< IntrusivePtr< const LoopNest > > | compute_in_tiles (const FunctionDAG::Node *f, const LoopNest *parent, const Anderson2021Params ¶ms, const Target &target, const SearchSpaceOptions &search_space_options, int v, bool in_realization, bool in_threads_loop, bool is_pre_pass, vector< int64_t > union_counts=vector< int64_t >()) const |
bool | has_constant_region_computed (const FunctionDAG::Node *node) const |
bool | has_constant_region_required (const FunctionDAG::Node *node) const |
bool | other_stage_has_same_producer (const FunctionDAG::Node *producer) const |
int | num_serial_loops (const FunctionDAG::Node::Stage *stage) const |
int | num_serial_loops () const |
bool | producer_computed_here_or_further_in (const FunctionDAG::Node *producer) const |
void | update_producers_to_be_staged (StageScheduleState &state, const NodeMap< bool > &all_inlined) const |
bool | region_computed_shrinks (const FunctionDAG::Node *f, const LoopNest *parent) const |
void | apply (LoopLevel here, StageMap< std::unique_ptr< StageScheduleState > > &state_map, double num_cores, int depth, const LoopNest *parent, const LoopNest *compute_site, const Target &target, std::vector< StageScheduleState * > &ancestors, const NodeMap< bool > &all_inlined) const |
double | max_idle_lane_wastage (const Target &target, GPULoopInfo gpu_loop_info) const |
bool | has_valid_thread_extents () const |
void | collect_nodes_that_should_be_inlined (const NodeMap< bool > &nodes_to_freeze, NodeMap< bool > &inlined_nodes) const |
void | collect_all_inlined (NodeMap< bool > &all_inlined) const |
int64_t | product_of_self_and_descendants (int loop_index) const |
int64_t | product_of_descendants (int loop_index) const |
void | get_stages_computed_in_each_compute_root_loop (StageMap< StageMap< bool > > &descendants, const LoopNest *compute_root_loop_nest=nullptr) const |
Static Public Member Functions | |
static void | hash_combine (uint64_t &h, uint64_t next) |
static void | hash_combine (uint64_t &h, uint64_t next) |
Public Attributes | |
RefCount | ref_count |
std::vector< int64_t > | size |
std::vector< IntrusivePtr< const LoopNest > > | children |
NodeMap< int64_t > | inlined |
std::set< const FunctionDAG::Node * > | store_at |
NodeMap< Bound > | bounds |
const FunctionDAG::Node * | node = nullptr |
const FunctionDAG::Node::Stage * | stage = nullptr |
bool | innermost = false |
bool | tileable = false |
bool | parallel = false |
int | vector_dim = -1 |
int | vectorized_loop_index = -1 |
std::map< uint64_t, StageMap< StageMap< FeatureIntermediates > > > | feature_intermediates_cache |
std::map< uint64_t, StageMap< ScheduleFeatures > > | features_cache |
vector< int64_t > | size |
vector< IntrusivePtr< const LoopNest > > | children |
GPU_parallelism | gpu_label = GPU_parallelism::None |
std::map< uint64_t, StageMap< StageMap< FeatureIntermediates > > > | feature_intermediates |
std::map< uint64_t, StageMap< ScheduleFeatures > > | features |
Definition at line 87 of file LoopNest.h.
void Halide::Internal::Autoscheduler::LoopNest::copy_from | ( | const LoopNest & | n | ) |
Referenced by Halide::Internal::Autoscheduler::deep_copy_loop_nest().
|
inlinestatic |
Definition at line 79 of file LoopNest.h.
void Halide::Internal::Autoscheduler::LoopNest::structural_hash | ( | uint64_t & | h, |
int | depth ) const |
|
inline |
Definition at line 91 of file LoopNest.h.
void Halide::Internal::Autoscheduler::LoopNest::get_sites | ( | StageMap< Sites > & | sites, |
const LoopNest * | task = nullptr, | ||
const LoopNest * | parent = nullptr ) const |
|
inline |
Definition at line 120 of file LoopNest.h.
void Halide::Internal::Autoscheduler::LoopNest::compute_features | ( | const FunctionDAG & | dag, |
const Adams2019Params & | params, | ||
const StageMap< Sites > & | sites, | ||
int64_t | instances, | ||
int64_t | parallelism, | ||
const LoopNest * | parent, | ||
const LoopNest * | grandparent, | ||
const LoopNest & | root, | ||
int64_t * | working_set, | ||
StageMap< ScheduleFeatures > * | features, | ||
bool | use_cached_features ) const |
|
inline |
Definition at line 141 of file LoopNest.h.
References node.
|
inline |
Definition at line 148 of file LoopNest.h.
References bounds.
const Bound & Halide::Internal::Autoscheduler::LoopNest::get_bounds | ( | const FunctionDAG::Node * | f | ) | const |
void Halide::Internal::Autoscheduler::LoopNest::dump | ( | std::ostream & | os, |
string | prefix, | ||
const LoopNest * | parent ) const |
Referenced by Halide::Internal::Autoscheduler::Filter::Filter().
bool Halide::Internal::Autoscheduler::LoopNest::calls | ( | const FunctionDAG::Node * | f | ) | const |
int64_t Halide::Internal::Autoscheduler::LoopNest::max_inlined_calls | ( | ) | const |
bool Halide::Internal::Autoscheduler::LoopNest::accesses_input_buffer | ( | ) | const |
bool Halide::Internal::Autoscheduler::LoopNest::computes | ( | const FunctionDAG::Node * | f | ) | const |
void Halide::Internal::Autoscheduler::LoopNest::inline_func | ( | const FunctionDAG::Node * | f | ) |
void Halide::Internal::Autoscheduler::LoopNest::compute_here | ( | const FunctionDAG::Node * | f, |
bool | tileable, | ||
int | v, | ||
const Adams2019Params & | params ) |
IntrusivePtr< const LoopNest > Halide::Internal::Autoscheduler::LoopNest::parallelize_in_tiles | ( | const Adams2019Params & | params, |
const vector< int64_t > & | tiling, | ||
const LoopNest * | parent ) const |
std::vector< IntrusivePtr< const LoopNest > > Halide::Internal::Autoscheduler::LoopNest::compute_in_tiles | ( | const FunctionDAG::Node * | f, |
const LoopNest * | parent, | ||
const Adams2019Params & | params, | ||
int | v, | ||
bool | in_realization ) const |
void Halide::Internal::Autoscheduler::LoopNest::apply | ( | LoopLevel | here, |
StageMap< std::unique_ptr< StageScheduleState > > & | state_map, | ||
double | num_cores, | ||
int | depth, | ||
const LoopNest * | parent, | ||
const LoopNest * | compute_site ) const |
void Halide::Internal::Autoscheduler::LoopNest::copy_from_including_features | ( | const LoopNest & | n | ) |
void Halide::Internal::Autoscheduler::LoopNest::memoize_points_computed_minimum | ( | StageMap< ScheduleFeatures > & | memoized_features, |
const StageMap< ScheduleFeatures > * | features ) const |
void Halide::Internal::Autoscheduler::LoopNest::memoize_features | ( | StageMap< ScheduleFeatures > & | memoized_features, |
const StageMap< ScheduleFeatures > * | features_to_insert ) const |
void Halide::Internal::Autoscheduler::LoopNest::compute_working_set_from_features | ( | int64_t * | working_set, |
const StageMap< ScheduleFeatures > * | features ) const |
void Halide::Internal::Autoscheduler::LoopNest::recompute_inlined_features | ( | const StageMap< Sites > & | sites, |
StageMap< ScheduleFeatures > * | features ) const |
uint64_t Halide::Internal::Autoscheduler::LoopNest::compute_hash_of_producers_stored_at_root | ( | const StageMap< Sites > & | sites | ) | const |
std::vector< std::pair< int, int > > Halide::Internal::Autoscheduler::LoopNest::collect_producers | ( | const StageMap< Sites > & | sites | ) | const |
void Halide::Internal::Autoscheduler::LoopNest::collect_stages | ( | std::set< const FunctionDAG::Node::Stage * > & | stages | ) | const |
|
inline |
Definition at line 148 of file LoopNest.h.
References gpu_label, Halide::Target::has_gpu_feature(), and Halide::Internal::Autoscheduler::Serial.
|
inline |
Definition at line 152 of file LoopNest.h.
References gpu_label, Halide::Target::has_gpu_feature(), and Halide::Internal::Autoscheduler::Thread.
|
inline |
Definition at line 156 of file LoopNest.h.
References Halide::Internal::Autoscheduler::Block, gpu_label, and Halide::Target::has_gpu_feature().
|
inline |
Definition at line 160 of file LoopNest.h.
References size.
vector< int64_t > Halide::Internal::Autoscheduler::LoopNest::get_union_thread_counts | ( | const FunctionDAG::Node * | f | ) | const |
void Halide::Internal::Autoscheduler::LoopNest::get_stage_sizes | ( | const FunctionDAG::Node * | f, |
vector< vector< int64_t > > & | stage_sizes, | ||
vector< vector< int > > & | pure_dims, | ||
vector< int > & | vectorized_indices ) const |
void Halide::Internal::Autoscheduler::LoopNest::generate_vec_dim_serial_tilings | ( | vector< int > & | serial_sizes | ) | const |
bool Halide::Internal::Autoscheduler::LoopNest::add_gpu_thread_tilings | ( | const FunctionDAG::Node * | f, |
const Anderson2021Params & | params, | ||
const Target & | target, | ||
int | v, | ||
vector< IntrusivePtr< const LoopNest > > & | result, | ||
const vector< int64_t > & | max_size ) |
void Halide::Internal::Autoscheduler::LoopNest::copy_from | ( | const LoopNest & | n | ) |
void Halide::Internal::Autoscheduler::LoopNest::copy_from_including_features | ( | const LoopNest & | n | ) |
|
inlinestatic |
Definition at line 193 of file LoopNest.h.
void Halide::Internal::Autoscheduler::LoopNest::structural_hash | ( | uint64_t & | h, |
int | depth ) const |
|
inline |
Definition at line 205 of file LoopNest.h.
GPUMemoryType Halide::Internal::Autoscheduler::LoopNest::get_gpu_memory_type | ( | bool | in_block, |
bool | in_thread, | ||
bool | is_inlined = false ) const |
std::vector< int > Halide::Internal::Autoscheduler::LoopNest::unrolled_loops | ( | const Target & | target, |
const LoopNest * | parent, | ||
const LoopNest * | grandparent ) const |
void Halide::Internal::Autoscheduler::LoopNest::get_allocs_that_can_be_promoted_to_registers | ( | const Target & | target, |
StageMap< Sites > & | sites, | ||
NodeMap< bool > & | can_be_promoted_to_registers, | ||
const LoopNest * | grandparent, | ||
const LoopNest * | parent ) const |
bool Halide::Internal::Autoscheduler::LoopNest::promote_allocs_to_registers | ( | const Target & | target, |
StageMap< Sites > & | sites ) const |
void Halide::Internal::Autoscheduler::LoopNest::get_sites | ( | const Target & | target, |
StageMap< Sites > & | sites, | ||
StageMap< int64_t > & | shared_mem_alloc_sizes, | ||
const LoopNest * | task = nullptr, | ||
const LoopNest * | parent = nullptr, | ||
const LoopNest * | current_thread_loop = nullptr ) const |
|
inline |
Definition at line 271 of file LoopNest.h.
bool Halide::Internal::Autoscheduler::LoopNest::exceeds_serial_extents_limit | ( | const Target & | target, |
const LoopNest * | parent, | ||
bool | in_threads_loop ) const |
bool Halide::Internal::Autoscheduler::LoopNest::node_has_dynamic_region_computed | ( | const FunctionDAG::Node * | f | ) | const |
bool Halide::Internal::Autoscheduler::LoopNest::has_dynamic_allocation_inside_thread | ( | bool | in_thread_loop | ) | const |
const LoopNest * Halide::Internal::Autoscheduler::LoopNest::find_pure_stage_loop_nest | ( | const FunctionDAG::Node * | node | ) | const |
int Halide::Internal::Autoscheduler::LoopNest::get_pure_stage_vectorized_loop_index | ( | const FunctionDAG::Node * | node | ) | const |
int Halide::Internal::Autoscheduler::LoopNest::get_vectorized_loop_index_from_pure_stage | ( | const LoopNest & | root | ) | const |
double Halide::Internal::Autoscheduler::LoopNest::storage_stride | ( | const LoadJacobian & | jac, |
int | innermost_storage_dim, | ||
const FunctionDAG::Node * | storage_node, | ||
const Bound & | store_bounds, | ||
const LoopNest & | root ) const |
Strides Halide::Internal::Autoscheduler::LoopNest::compute_strides | ( | const LoadJacobian & | jac, |
int | innermost_storage_dim, | ||
const FunctionDAG::Node * | storage_node, | ||
const Bound & | store_bounds, | ||
const ThreadInfo * | thread_info, | ||
bool | verbose = false ) const |
bool Halide::Internal::Autoscheduler::LoopNest::all_strides_exist | ( | const LoadJacobian & | jac, |
const FunctionDAG::Node * | storage_node, | ||
const LoopNest & | root ) const |
int Halide::Internal::Autoscheduler::LoopNest::get_actual_vector_dim | ( | const Bound & | store_bounds | ) | const |
void Halide::Internal::Autoscheduler::LoopNest::compute_gpu_store_features | ( | const LoadJacobian & | jac, |
int | consumer_innermost_dim, | ||
const FunctionDAG::Node * | node, | ||
const Bound & | consumer_store_bounds, | ||
const GPULoopInfo & | gpu_loop_info, | ||
const std::vector< int64_t > & | inner_serial_loop_extents, | ||
const Sites & | consumer_site, | ||
ScheduleFeatures & | feat, | ||
const LoopNest * | parent, | ||
const LoopNest & | root, | ||
GlobalMemInfo & | global_mem_loads, | ||
SharedMemInfo & | shared_mem_loads, | ||
LocalMemInfo & | local_mem_loads, | ||
bool | verbose = false ) const |
bool Halide::Internal::Autoscheduler::LoopNest::can_vectorize_access_for_innermost_dim | ( | const LoadJacobian & | jac, |
const FunctionDAG::Node * | accessed, | ||
int | innermost_dim, | ||
int | loop_index ) const |
bool Halide::Internal::Autoscheduler::LoopNest::can_vectorize_store_access | ( | const LoadJacobian & | jac, |
const FunctionDAG::Node * | accessed, | ||
bool | accessed_has_been_scheduled, | ||
int | innermost_dim, | ||
int | loop_index, | ||
const GPUMemoryType & | mem_type ) const |
int Halide::Internal::Autoscheduler::LoopNest::vectorized_load_access_size | ( | const LoadJacobian & | jac, |
const FunctionDAG::Node * | accessed, | ||
bool | accessed_has_been_scheduled, | ||
int | innermost_dim, | ||
const GPUMemoryType & | mem_type, | ||
bool | verbose = false ) const |
int Halide::Internal::Autoscheduler::LoopNest::vectorized_access_size | ( | size_t | loop_index, |
bool | verbose = false ) const |
void Halide::Internal::Autoscheduler::LoopNest::compute_num_mem_accesses_per_block | ( | const LoadJacobian & | jac, |
const FunctionDAG::Node * | node, | ||
const Bound & | store_bounds, | ||
const ThreadInfo * | thread_info, | ||
int | innermost_dim, | ||
double | num_requests_per_warp, | ||
MemInfoType< T > & | mem_info, | ||
bool | verbose = false ) const |
std::pair< double, double > Halide::Internal::Autoscheduler::LoopNest::compute_local_mem_store_features | ( | const LoadJacobian & | jac, |
int | consumer_innermost_dim, | ||
const FunctionDAG::Node * | node, | ||
const Bound & | consumer_store_bounds, | ||
const LoopNest & | root, | ||
double | serial_loop_extents ) const |
MemInfoType< T > Halide::Internal::Autoscheduler::LoopNest::compute_mem_store_info | ( | const LoadJacobian & | jac, |
int | consumer_innermost_dim, | ||
const FunctionDAG::Node * | node, | ||
const Bound & | consumer_store_bounds, | ||
const ThreadInfo * | thread_info, | ||
double | serial_loop_extents, | ||
bool | verbose ) const |
void Halide::Internal::Autoscheduler::LoopNest::compute_mem_load_features | ( | const LoadJacobian & | jac, |
int | producer_innermost_dim, | ||
const FunctionDAG::Node * | node, | ||
const Bound & | producer_store_bounds, | ||
bool | producer_has_been_scheduled, | ||
const ThreadInfo * | thread_info, | ||
MemInfoType< T > & | mem_info, | ||
double | serial_loop_extents, | ||
bool | verbose = false ) const |
double Halide::Internal::Autoscheduler::LoopNest::compute_local_mem_stride | ( | double | stride, |
double | bytes ) const |
const LoopNest * Halide::Internal::Autoscheduler::LoopNest::get_enclosing_block | ( | const LoopNest * | parent, |
const LoopNest * | grandparent ) const |
std::pair< int64_t, int64_t > Halide::Internal::Autoscheduler::LoopNest::get_block_and_serial_extents | ( | const LoopNest * | block | ) | const |
bool Halide::Internal::Autoscheduler::LoopNest::all_paths_to_leaves_have_thread_loop | ( | ) | const |
bool Halide::Internal::Autoscheduler::LoopNest::has_thread_loop_descendant | ( | ) | const |
void Halide::Internal::Autoscheduler::LoopNest::compute_warp_features | ( | ScheduleFeatures & | features, |
const GPULoopInfo & | gpu_loop_info ) const |
void Halide::Internal::Autoscheduler::LoopNest::compute_warp_and_block_occupancy | ( | const Anderson2021Params & | params, |
ScheduleFeatures & | feat, | ||
const GPULoopInfo & | gpu_loop_info ) const |
void Halide::Internal::Autoscheduler::LoopNest::compute_shared_mem_occupancy | ( | const Anderson2021Params & | params, |
const Target & | target, | ||
int64_t | total_shared_mem_alloc_size, | ||
ScheduleFeatures & | feat ) const |
std::pair< const LoopNest *, const LoopNest * > Halide::Internal::Autoscheduler::LoopNest::find_innermost_and_parent | ( | ) | const |
int64_t Halide::Internal::Autoscheduler::LoopNest::points_accessed_per_thread | ( | const Anderson2021Params & | params, |
const Target & | target, | ||
const GPULoopInfo & | gpu_loop_info, | ||
const std::vector< const FunctionDAG::Edge * > & | edge_chain, | ||
const LoadJacobian & | jac, | ||
const LoopNest * | parent, | ||
const LoopNest * | grandparent, | ||
int64_t | n, | ||
const ScheduleFeatures & | feat, | ||
const LoadJacobian & | serial_jac, | ||
bool | producer_has_been_scheduled, | ||
int | producer_innermost_dim, | ||
const GPUMemoryType & | mem_type, | ||
bool | verbose ) const |
int64_t Halide::Internal::Autoscheduler::LoopNest::compute_licm_amortization | ( | const LoopNest * | innermost, |
const LoopNest * | parent, | ||
const ScheduleFeatures & | feat, | ||
const LoadJacobian & | jac, | ||
int | producer_dims ) const |
void Halide::Internal::Autoscheduler::LoopNest::memoize_points_computed_minimum | ( | StageMap< ScheduleFeatures > & | memoized_features, |
const StageMap< ScheduleFeatures > * | features ) const |
vector< pair< int, int > > Halide::Internal::Autoscheduler::LoopNest::collect_producers | ( | const StageMap< Sites > & | sites | ) | const |
uint64_t Halide::Internal::Autoscheduler::LoopNest::compute_hash_of_producers_stored_at_root | ( | const StageMap< Sites > & | sites | ) | const |
void Halide::Internal::Autoscheduler::LoopNest::collect_stages | ( | std::set< const FunctionDAG::Node::Stage * > & | stages | ) | const |
void Halide::Internal::Autoscheduler::LoopNest::memoize_features | ( | StageMap< ScheduleFeatures > & | memoized_features, |
const StageMap< ScheduleFeatures > * | features ) const |
void Halide::Internal::Autoscheduler::LoopNest::compute_working_set_from_features | ( | int64_t * | working_set, |
const StageMap< ScheduleFeatures > * | features ) const |
void Halide::Internal::Autoscheduler::LoopNest::recompute_inlined_features | ( | const StageMap< Sites > & | sites, |
StageMap< ScheduleFeatures > * | features ) const |
std::pair< int64_t, bool > Halide::Internal::Autoscheduler::LoopNest::compute_alloc_size_of_node_here | ( | const FunctionDAG::Node * | f | ) | const |
void Halide::Internal::Autoscheduler::LoopNest::compute_features | ( | const FunctionDAG & | dag, |
const Anderson2021Params & | params, | ||
const Target & | target, | ||
const StageMap< Sites > & | sites, | ||
int64_t | instances, | ||
int64_t | parallelism, | ||
const LoopNest * | parent, | ||
const LoopNest * | grandparent, | ||
const LoopNest & | root, | ||
GPULoopInfo | gpu_loop_info, | ||
bool | use_memoized_features, | ||
const StageMap< int64_t > & | total_shared_mem_alloc_sizes, | ||
int64_t * | working_set, | ||
int64_t * | working_set_local_constant, | ||
int64_t * | working_set_local_dynamic, | ||
StageMap< ScheduleFeatures > * | features, | ||
Statistics & | stats, | ||
bool | verbose = false ) const |
|
inline |
Definition at line 477 of file LoopNest.h.
References node.
|
inline |
Definition at line 484 of file LoopNest.h.
References bounds.
const Bound & Halide::Internal::Autoscheduler::LoopNest::get_bounds | ( | const FunctionDAG::Node * | f | ) | const |
Bound Halide::Internal::Autoscheduler::LoopNest::get_bounds_along_edge_chain | ( | const FunctionDAG::Node * | f, |
const vector< const FunctionDAG::Edge * > & | edge_chain ) const |
void Halide::Internal::Autoscheduler::LoopNest::dump | ( | ) | const |
std::string Halide::Internal::Autoscheduler::LoopNest::to_string | ( | ) | const |
void Halide::Internal::Autoscheduler::LoopNest::dump | ( | T & | stream, |
string | prefix, | ||
const LoopNest * | parent ) const |
bool Halide::Internal::Autoscheduler::LoopNest::calls | ( | const FunctionDAG::Node * | f | ) | const |
int64_t Halide::Internal::Autoscheduler::LoopNest::max_inlined_calls | ( | ) | const |
bool Halide::Internal::Autoscheduler::LoopNest::accesses_input_buffer | ( | ) | const |
bool Halide::Internal::Autoscheduler::LoopNest::computes | ( | const FunctionDAG::Node * | f | ) | const |
void Halide::Internal::Autoscheduler::LoopNest::inline_func | ( | const FunctionDAG::Node * | f | ) |
bool Halide::Internal::Autoscheduler::LoopNest::compute_here | ( | const FunctionDAG::Node * | f, |
bool | tileable, | ||
int | v, | ||
bool | in_threads_loop, | ||
const Anderson2021Params & | params, | ||
const Target & | target ) |
IntrusivePtr< const LoopNest > Halide::Internal::Autoscheduler::LoopNest::parallelize_in_tiles | ( | const vector< int64_t > & | tiling, |
const LoopNest * | parent, | ||
const Anderson2021Params & | params, | ||
const Target & | target, | ||
bool | inner_tiling, | ||
bool | adjust_tiling, | ||
bool | move_all_rvars_inward = true, | ||
const vector< int > & | rvars_to_move_inward = {} ) const |
int64_t Halide::Internal::Autoscheduler::LoopNest::get_total_local_mem_alloc_size | ( | bool | constant_allocs_only = false, |
bool | in_threads_loop = false ) const |
int64_t Halide::Internal::Autoscheduler::LoopNest::get_total_constant_local_mem_alloc_size | ( | ) | const |
bool Halide::Internal::Autoscheduler::LoopNest::requires_dynamic_allocation | ( | const FunctionDAG::Node * | f, |
const Target & | target, | ||
bool | in_threads_loop ) const |
vector< IntrusivePtr< const LoopNest > > Halide::Internal::Autoscheduler::LoopNest::compute_in_tiles | ( | const FunctionDAG::Node * | f, |
const LoopNest * | parent, | ||
const Anderson2021Params & | params, | ||
const Target & | target, | ||
const SearchSpaceOptions & | search_space_options, | ||
int | v, | ||
bool | in_realization, | ||
bool | in_threads_loop, | ||
bool | is_pre_pass, | ||
vector< int64_t > | union_counts = vector< int64_t >() ) const |
bool Halide::Internal::Autoscheduler::LoopNest::has_constant_region_computed | ( | const FunctionDAG::Node * | node | ) | const |
bool Halide::Internal::Autoscheduler::LoopNest::has_constant_region_required | ( | const FunctionDAG::Node * | node | ) | const |
bool Halide::Internal::Autoscheduler::LoopNest::other_stage_has_same_producer | ( | const FunctionDAG::Node * | producer | ) | const |
int Halide::Internal::Autoscheduler::LoopNest::num_serial_loops | ( | const FunctionDAG::Node::Stage * | stage | ) | const |
int Halide::Internal::Autoscheduler::LoopNest::num_serial_loops | ( | ) | const |
bool Halide::Internal::Autoscheduler::LoopNest::producer_computed_here_or_further_in | ( | const FunctionDAG::Node * | producer | ) | const |
void Halide::Internal::Autoscheduler::LoopNest::update_producers_to_be_staged | ( | StageScheduleState & | state, |
const NodeMap< bool > & | all_inlined ) const |
bool Halide::Internal::Autoscheduler::LoopNest::region_computed_shrinks | ( | const FunctionDAG::Node * | f, |
const LoopNest * | parent ) const |
void Halide::Internal::Autoscheduler::LoopNest::apply | ( | LoopLevel | here, |
StageMap< std::unique_ptr< StageScheduleState > > & | state_map, | ||
double | num_cores, | ||
int | depth, | ||
const LoopNest * | parent, | ||
const LoopNest * | compute_site, | ||
const Target & | target, | ||
std::vector< StageScheduleState * > & | ancestors, | ||
const NodeMap< bool > & | all_inlined ) const |
double Halide::Internal::Autoscheduler::LoopNest::max_idle_lane_wastage | ( | const Target & | target, |
GPULoopInfo | gpu_loop_info ) const |
bool Halide::Internal::Autoscheduler::LoopNest::has_valid_thread_extents | ( | ) | const |
void Halide::Internal::Autoscheduler::LoopNest::collect_nodes_that_should_be_inlined | ( | const NodeMap< bool > & | nodes_to_freeze, |
NodeMap< bool > & | inlined_nodes ) const |
void Halide::Internal::Autoscheduler::LoopNest::collect_all_inlined | ( | NodeMap< bool > & | all_inlined | ) | const |
int64_t Halide::Internal::Autoscheduler::LoopNest::product_of_self_and_descendants | ( | int | loop_index | ) | const |
int64_t Halide::Internal::Autoscheduler::LoopNest::product_of_descendants | ( | int | loop_index | ) | const |
void Halide::Internal::Autoscheduler::LoopNest::get_stages_computed_in_each_compute_root_loop | ( | StageMap< StageMap< bool > > & | descendants, |
const LoopNest * | compute_root_loop_nest = nullptr ) const |
|
mutable |
Definition at line 35 of file LoopNest.h.
std::vector<int64_t> Halide::Internal::Autoscheduler::LoopNest::size |
Definition at line 39 of file LoopNest.h.
Referenced by is_scalar().
std::vector<IntrusivePtr<const LoopNest> > Halide::Internal::Autoscheduler::LoopNest::children |
Definition at line 42 of file LoopNest.h.
Referenced by Halide::Internal::Autoscheduler::deep_copy_loop_nest(), funcs_realized_or_inlined(), and set_working_set_at_task_feature().
Definition at line 46 of file LoopNest.h.
Referenced by funcs_realized_or_inlined().
std::set< const FunctionDAG::Node * > Halide::Internal::Autoscheduler::LoopNest::store_at |
Definition at line 49 of file LoopNest.h.
Referenced by funcs_realized_or_inlined().
Definition at line 54 of file LoopNest.h.
Referenced by set_bounds().
const FunctionDAG::Node * Halide::Internal::Autoscheduler::LoopNest::node = nullptr |
Definition at line 57 of file LoopNest.h.
Referenced by is_root().
const FunctionDAG::Node::Stage * Halide::Internal::Autoscheduler::LoopNest::stage = nullptr |
Definition at line 60 of file LoopNest.h.
bool Halide::Internal::Autoscheduler::LoopNest::innermost = false |
Definition at line 63 of file LoopNest.h.
bool Halide::Internal::Autoscheduler::LoopNest::tileable = false |
Definition at line 66 of file LoopNest.h.
bool Halide::Internal::Autoscheduler::LoopNest::parallel = false |
Definition at line 69 of file LoopNest.h.
int Halide::Internal::Autoscheduler::LoopNest::vector_dim = -1 |
Definition at line 72 of file LoopNest.h.
int Halide::Internal::Autoscheduler::LoopNest::vectorized_loop_index = -1 |
Definition at line 75 of file LoopNest.h.
|
mutable |
Definition at line 265 of file LoopNest.h.
|
mutable |
Definition at line 267 of file LoopNest.h.
vector<int64_t> Halide::Internal::Autoscheduler::LoopNest::size |
Definition at line 92 of file LoopNest.h.
vector<IntrusivePtr<const LoopNest> > Halide::Internal::Autoscheduler::LoopNest::children |
Definition at line 95 of file LoopNest.h.
|
mutable |
Definition at line 131 of file LoopNest.h.
Referenced by is_gpu_block(), is_gpu_serial(), and is_gpu_thread().
|
mutable |
Definition at line 145 of file LoopNest.h.
|
mutable |
Definition at line 146 of file LoopNest.h.
Referenced by set_working_set_at_task_feature().