#include <LoopNest.h>

Classes
struct	FeatureIntermediates
struct	Sites
struct	StageScheduleState

Public Member Functions
void	copy_from (const LoopNest &n)
void	structural_hash (uint64_t &h, int depth) const
size_t	funcs_realized_or_inlined () const
void	get_sites (StageMap< Sites > &sites, const LoopNest task=nullptr, const LoopNest parent=nullptr) const
void	set_working_set_at_task_feature (int64_t working_set, StageMap< ScheduleFeatures > *features) const
void	compute_features (const FunctionDAG &dag, const Adams2019Params &params, const StageMap< Sites > &sites, int64_t instances, int64_t parallelism, const LoopNest parent, const LoopNest grandparent, const LoopNest &root, int64_t working_set, StageMap< ScheduleFeatures > features, bool use_cached_features) const
bool	is_root () const
const Bound &	set_bounds (const FunctionDAG::Node f, BoundContents b) const
const Bound &	get_bounds (const FunctionDAG::Node *f) const
void	dump (std::ostream &os, string prefix, const LoopNest *parent) const
bool	calls (const FunctionDAG::Node *f) const
int64_t	max_inlined_calls () const
bool	accesses_input_buffer () const
bool	computes (const FunctionDAG::Node *f) const
void	inline_func (const FunctionDAG::Node *f)
void	compute_here (const FunctionDAG::Node *f, bool tileable, int v, const Adams2019Params &params)
IntrusivePtr< const LoopNest >	parallelize_in_tiles (const Adams2019Params &params, const vector< int64_t > &tiling, const LoopNest *parent) const
std::vector< IntrusivePtr< const LoopNest > >	compute_in_tiles (const FunctionDAG::Node f, const LoopNest parent, const Adams2019Params &params, int v, bool in_realization) const
void	apply (LoopLevel here, StageMap< std::unique_ptr< StageScheduleState > > &state_map, double num_cores, int depth, const LoopNest parent, const LoopNest compute_site) const
void	copy_from_including_features (const LoopNest &n)
void	memoize_points_computed_minimum (StageMap< ScheduleFeatures > &memoized_features, const StageMap< ScheduleFeatures > *features) const
void	memoize_features (StageMap< ScheduleFeatures > &memoized_features, const StageMap< ScheduleFeatures > *features_to_insert) const
void	compute_working_set_from_features (int64_t working_set, const StageMap< ScheduleFeatures > features) const
void	recompute_inlined_features (const StageMap< Sites > &sites, StageMap< ScheduleFeatures > *features) const
uint64_t	compute_hash_of_producers_stored_at_root (const StageMap< Sites > &sites) const
std::vector< std::pair< int, int > >	collect_producers (const StageMap< Sites > &sites) const
void	collect_stages (std::set< const FunctionDAG::Node::Stage * > &stages) const
bool	is_gpu_serial (const Target &target) const
bool	is_gpu_thread (const Target &target) const
bool	is_gpu_block (const Target &target) const
bool	is_scalar () const
vector< int64_t >	get_union_thread_counts (const FunctionDAG::Node *f) const
void	get_stage_sizes (const FunctionDAG::Node *f, vector< vector< int64_t > > &stage_sizes, vector< vector< int > > &pure_dims, vector< int > &vectorized_indices) const
void	generate_vec_dim_serial_tilings (vector< int > &serial_sizes) const
bool	add_gpu_thread_tilings (const FunctionDAG::Node *f, const Anderson2021Params &params, const Target &target, int v, vector< IntrusivePtr< const LoopNest > > &result, const vector< int64_t > &max_size)
void	copy_from (const LoopNest &n)
void	copy_from_including_features (const LoopNest &n)
void	structural_hash (uint64_t &h, int depth) const
size_t	funcs_realized_or_inlined () const
GPUMemoryType	get_gpu_memory_type (bool in_block, bool in_thread, bool is_inlined=false) const
std::vector< int >	unrolled_loops (const Target &target, const LoopNest parent, const LoopNest grandparent) const
void	get_allocs_that_can_be_promoted_to_registers (const Target &target, StageMap< Sites > &sites, NodeMap< bool > &can_be_promoted_to_registers, const LoopNest grandparent, const LoopNest parent) const
bool	promote_allocs_to_registers (const Target &target, StageMap< Sites > &sites) const
void	get_sites (const Target &target, StageMap< Sites > &sites, StageMap< int64_t > &shared_mem_alloc_sizes, const LoopNest task=nullptr, const LoopNest parent=nullptr, const LoopNest *current_thread_loop=nullptr) const
void	set_working_set_at_task_feature (int64_t working_set, StageMap< ScheduleFeatures > *features) const
bool	exceeds_serial_extents_limit (const Target &target, const LoopNest *parent, bool in_threads_loop) const
bool	node_has_dynamic_region_computed (const FunctionDAG::Node *f) const
bool	has_dynamic_allocation_inside_thread (bool in_thread_loop) const
const LoopNest *	find_pure_stage_loop_nest (const FunctionDAG::Node *node) const
int	get_pure_stage_vectorized_loop_index (const FunctionDAG::Node *node) const
int	get_vectorized_loop_index_from_pure_stage (const LoopNest &root) const
double	storage_stride (const LoadJacobian &jac, int innermost_storage_dim, const FunctionDAG::Node *storage_node, const Bound &store_bounds, const LoopNest &root) const
Strides	compute_strides (const LoadJacobian &jac, int innermost_storage_dim, const FunctionDAG::Node storage_node, const Bound &store_bounds, const ThreadInfo thread_info, bool verbose=false) const
bool	all_strides_exist (const LoadJacobian &jac, const FunctionDAG::Node *storage_node, const LoopNest &root) const
int	get_actual_vector_dim (const Bound &store_bounds) const
void	compute_gpu_store_features (const LoadJacobian &jac, int consumer_innermost_dim, const FunctionDAG::Node node, const Bound &consumer_store_bounds, const GPULoopInfo &gpu_loop_info, const std::vector< int64_t > &inner_serial_loop_extents, const Sites &consumer_site, ScheduleFeatures &feat, const LoopNest parent, const LoopNest &root, GlobalMemInfo &global_mem_loads, SharedMemInfo &shared_mem_loads, LocalMemInfo &local_mem_loads, bool verbose=false) const
bool	can_vectorize_access_for_innermost_dim (const LoadJacobian &jac, const FunctionDAG::Node *accessed, int innermost_dim, int loop_index) const
bool	can_vectorize_store_access (const LoadJacobian &jac, const FunctionDAG::Node *accessed, bool accessed_has_been_scheduled, int innermost_dim, int loop_index, const GPUMemoryType &mem_type) const
int	vectorized_load_access_size (const LoadJacobian &jac, const FunctionDAG::Node *accessed, bool accessed_has_been_scheduled, int innermost_dim, const GPUMemoryType &mem_type, bool verbose=false) const
int	vectorized_access_size (size_t loop_index, bool verbose=false) const
template<typename T>
void	compute_num_mem_accesses_per_block (const LoadJacobian &jac, const FunctionDAG::Node node, const Bound &store_bounds, const ThreadInfo thread_info, int innermost_dim, double num_requests_per_warp, MemInfoType< T > &mem_info, bool verbose=false) const
std::pair< double, double >	compute_local_mem_store_features (const LoadJacobian &jac, int consumer_innermost_dim, const FunctionDAG::Node *node, const Bound &consumer_store_bounds, const LoopNest &root, double serial_loop_extents) const
template<typename T>
MemInfoType< T >	compute_mem_store_info (const LoadJacobian &jac, int consumer_innermost_dim, const FunctionDAG::Node node, const Bound &consumer_store_bounds, const ThreadInfo thread_info, double serial_loop_extents, bool verbose) const
template<typename T>
void	compute_mem_load_features (const LoadJacobian &jac, int producer_innermost_dim, const FunctionDAG::Node node, const Bound &producer_store_bounds, bool producer_has_been_scheduled, const ThreadInfo thread_info, MemInfoType< T > &mem_info, double serial_loop_extents, bool verbose=false) const
double	compute_local_mem_stride (double stride, double bytes) const
const LoopNest *	get_enclosing_block (const LoopNest parent, const LoopNest grandparent) const
std::pair< int64_t, int64_t >	get_block_and_serial_extents (const LoopNest *block) const
bool	all_paths_to_leaves_have_thread_loop () const
bool	has_thread_loop_descendant () const
void	compute_warp_features (ScheduleFeatures &features, const GPULoopInfo &gpu_loop_info) const
void	compute_warp_and_block_occupancy (const Anderson2021Params &params, ScheduleFeatures &feat, const GPULoopInfo &gpu_loop_info) const
void	compute_shared_mem_occupancy (const Anderson2021Params &params, const Target &target, int64_t total_shared_mem_alloc_size, ScheduleFeatures &feat) const
std::pair< const LoopNest , const LoopNest >	find_innermost_and_parent () const
int64_t	points_accessed_per_thread (const Anderson2021Params &params, const Target &target, const GPULoopInfo &gpu_loop_info, const std::vector< const FunctionDAG::Edge * > &edge_chain, const LoadJacobian &jac, const LoopNest parent, const LoopNest grandparent, int64_t n, const ScheduleFeatures &feat, const LoadJacobian &serial_jac, bool producer_has_been_scheduled, int producer_innermost_dim, const GPUMemoryType &mem_type, bool verbose) const
int64_t	compute_licm_amortization (const LoopNest innermost, const LoopNest parent, const ScheduleFeatures &feat, const LoadJacobian &jac, int producer_dims) const
void	memoize_points_computed_minimum (StageMap< ScheduleFeatures > &memoized_features, const StageMap< ScheduleFeatures > *features) const
vector< pair< int, int > >	collect_producers (const StageMap< Sites > &sites) const
uint64_t	compute_hash_of_producers_stored_at_root (const StageMap< Sites > &sites) const
void	collect_stages (std::set< const FunctionDAG::Node::Stage * > &stages) const
void	memoize_features (StageMap< ScheduleFeatures > &memoized_features, const StageMap< ScheduleFeatures > *features) const
void	compute_working_set_from_features (int64_t working_set, const StageMap< ScheduleFeatures > features) const
void	recompute_inlined_features (const StageMap< Sites > &sites, StageMap< ScheduleFeatures > *features) const
std::pair< int64_t, bool >	compute_alloc_size_of_node_here (const FunctionDAG::Node *f) const
void	compute_features (const FunctionDAG &dag, const Anderson2021Params &params, const Target &target, const StageMap< Sites > &sites, int64_t instances, int64_t parallelism, const LoopNest parent, const LoopNest grandparent, const LoopNest &root, GPULoopInfo gpu_loop_info, bool use_memoized_features, const StageMap< int64_t > &total_shared_mem_alloc_sizes, int64_t working_set, int64_t working_set_local_constant, int64_t working_set_local_dynamic, StageMap< ScheduleFeatures > features, Statistics &stats, bool verbose=false) const
bool	is_root () const
const Bound &	set_bounds (const FunctionDAG::Node f, BoundContents b) const
const Bound &	get_bounds (const FunctionDAG::Node *f) const
Bound	get_bounds_along_edge_chain (const FunctionDAG::Node f, const vector< const FunctionDAG::Edge > &edge_chain) const
void	dump () const
std::string	to_string () const
template<typename T>
void	dump (T &stream, string prefix, const LoopNest *parent) const
bool	calls (const FunctionDAG::Node *f) const
int64_t	max_inlined_calls () const
bool	accesses_input_buffer () const
bool	computes (const FunctionDAG::Node *f) const
void	inline_func (const FunctionDAG::Node *f)
bool	compute_here (const FunctionDAG::Node *f, bool tileable, int v, bool in_threads_loop, const Anderson2021Params &params, const Target &target)
IntrusivePtr< const LoopNest >	parallelize_in_tiles (const vector< int64_t > &tiling, const LoopNest *parent, const Anderson2021Params &params, const Target &target, bool inner_tiling, bool adjust_tiling, bool move_all_rvars_inward=true, const vector< int > &rvars_to_move_inward={}) const
int64_t	get_total_local_mem_alloc_size (bool constant_allocs_only=false, bool in_threads_loop=false) const
int64_t	get_total_constant_local_mem_alloc_size () const
bool	requires_dynamic_allocation (const FunctionDAG::Node *f, const Target &target, bool in_threads_loop) const
vector< IntrusivePtr< const LoopNest > >	compute_in_tiles (const FunctionDAG::Node f, const LoopNest parent, const Anderson2021Params &params, const Target &target, const SearchSpaceOptions &search_space_options, int v, bool in_realization, bool in_threads_loop, bool is_pre_pass, vector< int64_t > union_counts=vector< int64_t >()) const
bool	has_constant_region_computed (const FunctionDAG::Node *node) const
bool	has_constant_region_required (const FunctionDAG::Node *node) const
bool	other_stage_has_same_producer (const FunctionDAG::Node *producer) const
int	num_serial_loops (const FunctionDAG::Node::Stage *stage) const
int	num_serial_loops () const
bool	producer_computed_here_or_further_in (const FunctionDAG::Node *producer) const
void	update_producers_to_be_staged (StageScheduleState &state, const NodeMap< bool > &all_inlined) const
bool	region_computed_shrinks (const FunctionDAG::Node f, const LoopNest parent) const
void	apply (LoopLevel here, StageMap< std::unique_ptr< StageScheduleState > > &state_map, double num_cores, int depth, const LoopNest parent, const LoopNest compute_site, const Target &target, std::vector< StageScheduleState * > &ancestors, const NodeMap< bool > &all_inlined) const
double	max_idle_lane_wastage (const Target &target, GPULoopInfo gpu_loop_info) const
bool	has_valid_thread_extents () const
void	collect_nodes_that_should_be_inlined (const NodeMap< bool > &nodes_to_freeze, NodeMap< bool > &inlined_nodes) const
void	collect_all_inlined (NodeMap< bool > &all_inlined) const
int64_t	product_of_self_and_descendants (int loop_index) const
int64_t	product_of_descendants (int loop_index) const
void	get_stages_computed_in_each_compute_root_loop (StageMap< StageMap< bool > > &descendants, const LoopNest *compute_root_loop_nest=nullptr) const

Static Public Member Functions
static void	hash_combine (uint64_t &h, uint64_t next)
static void	hash_combine (uint64_t &h, uint64_t next)

Public Attributes
RefCount	ref_count
std::vector< int64_t >	size
std::vector< IntrusivePtr< const LoopNest > >	children
NodeMap< int64_t >	inlined
std::set< const FunctionDAG::Node * >	store_at
NodeMap< Bound >	bounds
const FunctionDAG::Node *	node = nullptr
const FunctionDAG::Node::Stage *	stage = nullptr
bool	innermost = false
bool	tileable = false
bool	parallel = false
int	vector_dim = -1
int	vectorized_loop_index = -1
std::map< uint64_t, StageMap< StageMap< FeatureIntermediates > > >	feature_intermediates_cache
std::map< uint64_t, StageMap< ScheduleFeatures > >	features_cache
vector< int64_t >	size
vector< IntrusivePtr< const LoopNest > >	children
GPU_parallelism	gpu_label = GPU_parallelism::None
std::map< uint64_t, StageMap< StageMap< FeatureIntermediates > > >	feature_intermediates
std::map< uint64_t, StageMap< ScheduleFeatures > >	features

Detailed Description

Definition at line 34 of file LoopNest.h.

Member Function Documentation

◆ copy_from() [1/2]

void Halide::Internal::Autoscheduler::LoopNest::copy_from ( const LoopNest & n )

Referenced by Halide::Internal::Autoscheduler::deep_copy_loop_nest().

◆ hash_combine() [1/2]

void Halide::Internal::Autoscheduler::LoopNest::hash_combine	(	uint64_t &	h,
		uint64_t	next )

inlinestatic

Definition at line 79 of file LoopNest.h.

◆ structural_hash() [1/2]

void Halide::Internal::Autoscheduler::LoopNest::structural_hash	(	uint64_t &	h,
		int	depth ) const

◆ funcs_realized_or_inlined() [1/2]

size_t Halide::Internal::Autoscheduler::LoopNest::funcs_realized_or_inlined ( ) const

inline

Definition at line 91 of file LoopNest.h.

References children, inlined, and store_at.

◆ get_sites() [1/2]

void Halide::Internal::Autoscheduler::LoopNest::get_sites	(	StageMap< Sites > &	sites,
		const LoopNest *	task = nullptr,
		const LoopNest *	parent = nullptr ) const

◆ set_working_set_at_task_feature() [1/2]

void Halide::Internal::Autoscheduler::LoopNest::set_working_set_at_task_feature	(	int64_t	working_set,
		StageMap< ScheduleFeatures > *	features ) const

inline

Definition at line 120 of file LoopNest.h.

References children, and features.

◆ compute_features() [1/2]

void Halide::Internal::Autoscheduler::LoopNest::compute_features	(	const FunctionDAG &	dag,
		const Adams2019Params &	params,
		const StageMap< Sites > &	sites,
		int64_t	instances,
		int64_t	parallelism,
		const LoopNest *	parent,
		const LoopNest *	grandparent,
		const LoopNest &	root,
		int64_t *	working_set,
		StageMap< ScheduleFeatures > *	features,
		bool	use_cached_features ) const

References features.

◆ is_root() [1/2]

bool Halide::Internal::Autoscheduler::LoopNest::is_root ( ) const

inline

Definition at line 141 of file LoopNest.h.

References node.

◆ set_bounds() [1/2]

const Bound & Halide::Internal::Autoscheduler::LoopNest::set_bounds	(	const FunctionDAG::Node *	f,
		BoundContents *	b ) const

inline

Definition at line 148 of file LoopNest.h.

References bounds.

◆ get_bounds() [1/2]

const Bound & Halide::Internal::Autoscheduler::LoopNest::get_bounds ( const FunctionDAG::Node * f ) const

◆ dump() [1/3]

void Halide::Internal::Autoscheduler::LoopNest::dump	(	std::ostream &	os,
		string	prefix,
		const LoopNest *	parent ) const

◆ calls() [1/2]

bool Halide::Internal::Autoscheduler::LoopNest::calls ( const FunctionDAG::Node * f ) const

◆ max_inlined_calls() [1/2]

int64_t Halide::Internal::Autoscheduler::LoopNest::max_inlined_calls ( ) const

◆ accesses_input_buffer() [1/2]

bool Halide::Internal::Autoscheduler::LoopNest::accesses_input_buffer ( ) const

◆ computes() [1/2]

bool Halide::Internal::Autoscheduler::LoopNest::computes ( const FunctionDAG::Node * f ) const

◆ inline_func() [1/2]

void Halide::Internal::Autoscheduler::LoopNest::inline_func ( const FunctionDAG::Node * f )

◆ compute_here() [1/2]

void Halide::Internal::Autoscheduler::LoopNest::compute_here	(	const FunctionDAG::Node *	f,
		bool	tileable,
		int	v,
		const Adams2019Params &	params )

References tileable.

◆ parallelize_in_tiles() [1/2]

IntrusivePtr< const LoopNest > Halide::Internal::Autoscheduler::LoopNest::parallelize_in_tiles	(	const Adams2019Params &	params,
		const vector< int64_t > &	tiling,
		const LoopNest *	parent ) const

◆ compute_in_tiles() [1/2]

std::vector< IntrusivePtr< const LoopNest > > Halide::Internal::Autoscheduler::LoopNest::compute_in_tiles	(	const FunctionDAG::Node *	f,
		const LoopNest *	parent,
		const Adams2019Params &	params,
		int	v,
		bool	in_realization ) const

◆ apply() [1/2]

void Halide::Internal::Autoscheduler::LoopNest::apply	(	LoopLevel	here,
		StageMap< std::unique_ptr< StageScheduleState > > &	state_map,
		double	num_cores,
		int	depth,
		const LoopNest *	parent,
		const LoopNest *	compute_site ) const

◆ copy_from_including_features() [1/2]

void Halide::Internal::Autoscheduler::LoopNest::copy_from_including_features ( const LoopNest & n )

◆ memoize_points_computed_minimum() [1/2]

void Halide::Internal::Autoscheduler::LoopNest::memoize_points_computed_minimum	(	StageMap< ScheduleFeatures > &	memoized_features,
		const StageMap< ScheduleFeatures > *	features ) const

References features.

◆ memoize_features() [1/2]

void Halide::Internal::Autoscheduler::LoopNest::memoize_features	(	StageMap< ScheduleFeatures > &	memoized_features,
		const StageMap< ScheduleFeatures > *	features_to_insert ) const

◆ compute_working_set_from_features() [1/2]

void Halide::Internal::Autoscheduler::LoopNest::compute_working_set_from_features	(	int64_t *	working_set,
		const StageMap< ScheduleFeatures > *	features ) const

References features.

◆ recompute_inlined_features() [1/2]

void Halide::Internal::Autoscheduler::LoopNest::recompute_inlined_features	(	const StageMap< Sites > &	sites,
		StageMap< ScheduleFeatures > *	features ) const

References features.

◆ compute_hash_of_producers_stored_at_root() [1/2]

uint64_t Halide::Internal::Autoscheduler::LoopNest::compute_hash_of_producers_stored_at_root ( const StageMap< Sites > & sites ) const

◆ collect_producers() [1/2]

std::vector< std::pair< int, int > > Halide::Internal::Autoscheduler::LoopNest::collect_producers ( const StageMap< Sites > & sites ) const

◆ collect_stages() [1/2]

void Halide::Internal::Autoscheduler::LoopNest::collect_stages ( std::set< const FunctionDAG::Node::Stage * > & stages ) const

◆ is_gpu_serial()

bool Halide::Internal::Autoscheduler::LoopNest::is_gpu_serial ( const Target & target ) const

inline

Definition at line 148 of file LoopNest.h.

References gpu_label, Halide::Target::has_gpu_feature(), and Halide::Internal::Autoscheduler::Serial.

◆ is_gpu_thread()

bool Halide::Internal::Autoscheduler::LoopNest::is_gpu_thread ( const Target & target ) const

inline

Definition at line 152 of file LoopNest.h.

References gpu_label, Halide::Target::has_gpu_feature(), and Halide::Internal::Autoscheduler::Thread.

◆ is_gpu_block()

bool Halide::Internal::Autoscheduler::LoopNest::is_gpu_block ( const Target & target ) const

inline

Definition at line 156 of file LoopNest.h.

References Halide::Internal::Autoscheduler::Block, gpu_label, and Halide::Target::has_gpu_feature().

◆ is_scalar()

bool Halide::Internal::Autoscheduler::LoopNest::is_scalar ( ) const

inline

Definition at line 160 of file LoopNest.h.

References size.

◆ get_union_thread_counts()

vector< int64_t > Halide::Internal::Autoscheduler::LoopNest::get_union_thread_counts ( const FunctionDAG::Node * f ) const

◆ get_stage_sizes()

void Halide::Internal::Autoscheduler::LoopNest::get_stage_sizes	(	const FunctionDAG::Node *	f,
		vector< vector< int64_t > > &	stage_sizes,
		vector< vector< int > > &	pure_dims,
		vector< int > &	vectorized_indices ) const

◆ generate_vec_dim_serial_tilings()

void Halide::Internal::Autoscheduler::LoopNest::generate_vec_dim_serial_tilings ( vector< int > & serial_sizes ) const

◆ add_gpu_thread_tilings()

bool Halide::Internal::Autoscheduler::LoopNest::add_gpu_thread_tilings	(	const FunctionDAG::Node *	f,
		const Anderson2021Params &	params,
		const Target &	target,
		int	v,
		vector< IntrusivePtr< const LoopNest > > &	result,
		const vector< int64_t > &	max_size )

◆ copy_from() [2/2]

void Halide::Internal::Autoscheduler::LoopNest::copy_from ( const LoopNest & n )

◆ copy_from_including_features() [2/2]

void Halide::Internal::Autoscheduler::LoopNest::copy_from_including_features ( const LoopNest & n )

◆ hash_combine() [2/2]

void Halide::Internal::Autoscheduler::LoopNest::hash_combine	(	uint64_t &	h,
		uint64_t	next )

inlinestatic

Definition at line 193 of file LoopNest.h.

◆ structural_hash() [2/2]

void Halide::Internal::Autoscheduler::LoopNest::structural_hash	(	uint64_t &	h,
		int	depth ) const

◆ funcs_realized_or_inlined() [2/2]

size_t Halide::Internal::Autoscheduler::LoopNest::funcs_realized_or_inlined ( ) const

inline

Definition at line 205 of file LoopNest.h.

References children, inlined, and store_at.

◆ get_gpu_memory_type()

GPUMemoryType Halide::Internal::Autoscheduler::LoopNest::get_gpu_memory_type	(	bool	in_block,
		bool	in_thread,
		bool	is_inlined = false ) const

◆ unrolled_loops()

std::vector< int > Halide::Internal::Autoscheduler::LoopNest::unrolled_loops	(	const Target &	target,
		const LoopNest *	parent,
		const LoopNest *	grandparent ) const

◆ get_allocs_that_can_be_promoted_to_registers()

void Halide::Internal::Autoscheduler::LoopNest::get_allocs_that_can_be_promoted_to_registers	(	const Target &	target,
		StageMap< Sites > &	sites,
		NodeMap< bool > &	can_be_promoted_to_registers,
		const LoopNest *	grandparent,
		const LoopNest *	parent ) const

◆ promote_allocs_to_registers()

bool Halide::Internal::Autoscheduler::LoopNest::promote_allocs_to_registers	(	const Target &	target,
		StageMap< Sites > &	sites ) const

◆ get_sites() [2/2]

void Halide::Internal::Autoscheduler::LoopNest::get_sites	(	const Target &	target,
		StageMap< Sites > &	sites,
		StageMap< int64_t > &	shared_mem_alloc_sizes,
		const LoopNest *	task = nullptr,
		const LoopNest *	parent = nullptr,
		const LoopNest *	current_thread_loop = nullptr ) const

◆ set_working_set_at_task_feature() [2/2]

void Halide::Internal::Autoscheduler::LoopNest::set_working_set_at_task_feature	(	int64_t	working_set,
		StageMap< ScheduleFeatures > *	features ) const

inline

Definition at line 271 of file LoopNest.h.

References children, and features.

◆ exceeds_serial_extents_limit()

bool Halide::Internal::Autoscheduler::LoopNest::exceeds_serial_extents_limit	(	const Target &	target,
		const LoopNest *	parent,
		bool	in_threads_loop ) const

◆ node_has_dynamic_region_computed()

bool Halide::Internal::Autoscheduler::LoopNest::node_has_dynamic_region_computed ( const FunctionDAG::Node * f ) const

◆ has_dynamic_allocation_inside_thread()

bool Halide::Internal::Autoscheduler::LoopNest::has_dynamic_allocation_inside_thread ( bool in_thread_loop ) const

◆ find_pure_stage_loop_nest()

const LoopNest * Halide::Internal::Autoscheduler::LoopNest::find_pure_stage_loop_nest ( const FunctionDAG::Node * node ) const

References node.

◆ get_pure_stage_vectorized_loop_index()

int Halide::Internal::Autoscheduler::LoopNest::get_pure_stage_vectorized_loop_index ( const FunctionDAG::Node * node ) const

References node.

◆ get_vectorized_loop_index_from_pure_stage()

int Halide::Internal::Autoscheduler::LoopNest::get_vectorized_loop_index_from_pure_stage ( const LoopNest & root ) const

◆ storage_stride()

double Halide::Internal::Autoscheduler::LoopNest::storage_stride	(	const LoadJacobian &	jac,
		int	innermost_storage_dim,
		const FunctionDAG::Node *	storage_node,
		const Bound &	store_bounds,
		const LoopNest &	root ) const

◆ compute_strides()

Strides Halide::Internal::Autoscheduler::LoopNest::compute_strides	(	const LoadJacobian &	jac,
		int	innermost_storage_dim,
		const FunctionDAG::Node *	storage_node,
		const Bound &	store_bounds,
		const ThreadInfo *	thread_info,
		bool	verbose = false ) const

◆ all_strides_exist()

bool Halide::Internal::Autoscheduler::LoopNest::all_strides_exist	(	const LoadJacobian &	jac,
		const FunctionDAG::Node *	storage_node,
		const LoopNest &	root ) const

◆ get_actual_vector_dim()

int Halide::Internal::Autoscheduler::LoopNest::get_actual_vector_dim ( const Bound & store_bounds ) const

◆ compute_gpu_store_features()

void Halide::Internal::Autoscheduler::LoopNest::compute_gpu_store_features	(	const LoadJacobian &	jac,
		int	consumer_innermost_dim,
		const FunctionDAG::Node *	node,
		const Bound &	consumer_store_bounds,
		const GPULoopInfo &	gpu_loop_info,
		const std::vector< int64_t > &	inner_serial_loop_extents,
		const Sites &	consumer_site,
		ScheduleFeatures &	feat,
		const LoopNest *	parent,
		const LoopNest &	root,
		GlobalMemInfo &	global_mem_loads,
		SharedMemInfo &	shared_mem_loads,
		LocalMemInfo &	local_mem_loads,
		bool	verbose = false ) const

References node.

◆ can_vectorize_access_for_innermost_dim()

bool Halide::Internal::Autoscheduler::LoopNest::can_vectorize_access_for_innermost_dim	(	const LoadJacobian &	jac,
		const FunctionDAG::Node *	accessed,
		int	innermost_dim,
		int	loop_index ) const

◆ can_vectorize_store_access()

bool Halide::Internal::Autoscheduler::LoopNest::can_vectorize_store_access	(	const LoadJacobian &	jac,
		const FunctionDAG::Node *	accessed,
		bool	accessed_has_been_scheduled,
		int	innermost_dim,
		int	loop_index,
		const GPUMemoryType &	mem_type ) const

◆ vectorized_load_access_size()

int Halide::Internal::Autoscheduler::LoopNest::vectorized_load_access_size	(	const LoadJacobian &	jac,
		const FunctionDAG::Node *	accessed,
		bool	accessed_has_been_scheduled,
		int	innermost_dim,
		const GPUMemoryType &	mem_type,
		bool	verbose = false ) const

◆ vectorized_access_size()

int Halide::Internal::Autoscheduler::LoopNest::vectorized_access_size	(	size_t	loop_index,
		bool	verbose = false ) const

◆ compute_num_mem_accesses_per_block()

template<typename T>

void Halide::Internal::Autoscheduler::LoopNest::compute_num_mem_accesses_per_block	(	const LoadJacobian &	jac,
		const FunctionDAG::Node *	node,
		const Bound &	store_bounds,
		const ThreadInfo *	thread_info,
		int	innermost_dim,
		double	num_requests_per_warp,
		MemInfoType< T > &	mem_info,
		bool	verbose = false ) const

References node.

◆ compute_local_mem_store_features()

std::pair< double, double > Halide::Internal::Autoscheduler::LoopNest::compute_local_mem_store_features	(	const LoadJacobian &	jac,
		int	consumer_innermost_dim,
		const FunctionDAG::Node *	node,
		const Bound &	consumer_store_bounds,
		const LoopNest &	root,
		double	serial_loop_extents ) const

References node.

◆ compute_mem_store_info()

template<typename T>

MemInfoType< T > Halide::Internal::Autoscheduler::LoopNest::compute_mem_store_info	(	const LoadJacobian &	jac,
		int	consumer_innermost_dim,
		const FunctionDAG::Node *	node,
		const Bound &	consumer_store_bounds,
		const ThreadInfo *	thread_info,
		double	serial_loop_extents,
		bool	verbose ) const

References node.

◆ compute_mem_load_features()

template<typename T>

void Halide::Internal::Autoscheduler::LoopNest::compute_mem_load_features	(	const LoadJacobian &	jac,
		int	producer_innermost_dim,
		const FunctionDAG::Node *	node,
		const Bound &	producer_store_bounds,
		bool	producer_has_been_scheduled,
		const ThreadInfo *	thread_info,
		MemInfoType< T > &	mem_info,
		double	serial_loop_extents,
		bool	verbose = false ) const

References node.

◆ compute_local_mem_stride()

double Halide::Internal::Autoscheduler::LoopNest::compute_local_mem_stride	(	double	stride,
		double	bytes ) const

◆ get_enclosing_block()

const LoopNest * Halide::Internal::Autoscheduler::LoopNest::get_enclosing_block	(	const LoopNest *	parent,
		const LoopNest *	grandparent ) const

◆ get_block_and_serial_extents()

std::pair< int64_t, int64_t > Halide::Internal::Autoscheduler::LoopNest::get_block_and_serial_extents ( const LoopNest * block ) const

◆ all_paths_to_leaves_have_thread_loop()

bool Halide::Internal::Autoscheduler::LoopNest::all_paths_to_leaves_have_thread_loop ( ) const

◆ has_thread_loop_descendant()

bool Halide::Internal::Autoscheduler::LoopNest::has_thread_loop_descendant ( ) const

◆ compute_warp_features()

void Halide::Internal::Autoscheduler::LoopNest::compute_warp_features	(	ScheduleFeatures &	features,
		const GPULoopInfo &	gpu_loop_info ) const

References features.

◆ compute_warp_and_block_occupancy()

void Halide::Internal::Autoscheduler::LoopNest::compute_warp_and_block_occupancy	(	const Anderson2021Params &	params,
		ScheduleFeatures &	feat,
		const GPULoopInfo &	gpu_loop_info ) const

◆ compute_shared_mem_occupancy()

void Halide::Internal::Autoscheduler::LoopNest::compute_shared_mem_occupancy	(	const Anderson2021Params &	params,
		const Target &	target,
		int64_t	total_shared_mem_alloc_size,
		ScheduleFeatures &	feat ) const

◆ find_innermost_and_parent()

std::pair< const LoopNest *, const LoopNest * > Halide::Internal::Autoscheduler::LoopNest::find_innermost_and_parent ( ) const

◆ points_accessed_per_thread()

int64_t Halide::Internal::Autoscheduler::LoopNest::points_accessed_per_thread	(	const Anderson2021Params &	params,
		const Target &	target,
		const GPULoopInfo &	gpu_loop_info,
		const std::vector< const FunctionDAG::Edge * > &	edge_chain,
		const LoadJacobian &	jac,
		const LoopNest *	parent,
		const LoopNest *	grandparent,
		int64_t	n,
		const ScheduleFeatures &	feat,
		const LoadJacobian &	serial_jac,
		bool	producer_has_been_scheduled,
		int	producer_innermost_dim,
		const GPUMemoryType &	mem_type,
		bool	verbose ) const

◆ compute_licm_amortization()

int64_t Halide::Internal::Autoscheduler::LoopNest::compute_licm_amortization	(	const LoopNest *	innermost,
		const LoopNest *	parent,
		const ScheduleFeatures &	feat,
		const LoadJacobian &	jac,
		int	producer_dims ) const

References innermost.

◆ memoize_points_computed_minimum() [2/2]

void Halide::Internal::Autoscheduler::LoopNest::memoize_points_computed_minimum	(	StageMap< ScheduleFeatures > &	memoized_features,
		const StageMap< ScheduleFeatures > *	features ) const

References features.

◆ collect_producers() [2/2]

vector< pair< int, int > > Halide::Internal::Autoscheduler::LoopNest::collect_producers ( const StageMap< Sites > & sites ) const

◆ compute_hash_of_producers_stored_at_root() [2/2]

uint64_t Halide::Internal::Autoscheduler::LoopNest::compute_hash_of_producers_stored_at_root ( const StageMap< Sites > & sites ) const

◆ collect_stages() [2/2]

void Halide::Internal::Autoscheduler::LoopNest::collect_stages ( std::set< const FunctionDAG::Node::Stage * > & stages ) const

◆ memoize_features() [2/2]

void Halide::Internal::Autoscheduler::LoopNest::memoize_features	(	StageMap< ScheduleFeatures > &	memoized_features,
		const StageMap< ScheduleFeatures > *	features ) const

References features.

◆ compute_working_set_from_features() [2/2]

void Halide::Internal::Autoscheduler::LoopNest::compute_working_set_from_features	(	int64_t *	working_set,
		const StageMap< ScheduleFeatures > *	features ) const

References features.

◆ recompute_inlined_features() [2/2]

void Halide::Internal::Autoscheduler::LoopNest::recompute_inlined_features	(	const StageMap< Sites > &	sites,
		StageMap< ScheduleFeatures > *	features ) const

References features.

◆ compute_alloc_size_of_node_here()

std::pair< int64_t, bool > Halide::Internal::Autoscheduler::LoopNest::compute_alloc_size_of_node_here ( const FunctionDAG::Node * f ) const

◆ compute_features() [2/2]

void Halide::Internal::Autoscheduler::LoopNest::compute_features	(	const FunctionDAG &	dag,
		const Anderson2021Params &	params,
		const Target &	target,
		const StageMap< Sites > &	sites,
		int64_t	instances,
		int64_t	parallelism,
		const LoopNest *	parent,
		const LoopNest *	grandparent,
		const LoopNest &	root,
		GPULoopInfo	gpu_loop_info,
		bool	use_memoized_features,
		const StageMap< int64_t > &	total_shared_mem_alloc_sizes,
		int64_t *	working_set,
		int64_t *	working_set_local_constant,
		int64_t *	working_set_local_dynamic,
		StageMap< ScheduleFeatures > *	features,
		Statistics &	stats,
		bool	verbose = false ) const

References features.

◆ is_root() [2/2]

bool Halide::Internal::Autoscheduler::LoopNest::is_root ( ) const

inline

Definition at line 477 of file LoopNest.h.

References node.

◆ set_bounds() [2/2]

const Bound & Halide::Internal::Autoscheduler::LoopNest::set_bounds	(	const FunctionDAG::Node *	f,
		BoundContents *	b ) const

inline

Definition at line 484 of file LoopNest.h.

References bounds.

◆ get_bounds() [2/2]

const Bound & Halide::Internal::Autoscheduler::LoopNest::get_bounds ( const FunctionDAG::Node * f ) const

◆ get_bounds_along_edge_chain()

Bound Halide::Internal::Autoscheduler::LoopNest::get_bounds_along_edge_chain	(	const FunctionDAG::Node *	f,
		const vector< const FunctionDAG::Edge * > &	edge_chain ) const

◆ dump() [2/3]

void Halide::Internal::Autoscheduler::LoopNest::dump ( ) const

◆ to_string()

std::string Halide::Internal::Autoscheduler::LoopNest::to_string ( ) const

◆ dump() [3/3]

template<typename T>

void Halide::Internal::Autoscheduler::LoopNest::dump	(	T &	stream,
		string	prefix,
		const LoopNest *	parent ) const

◆ calls() [2/2]

bool Halide::Internal::Autoscheduler::LoopNest::calls ( const FunctionDAG::Node * f ) const

◆ max_inlined_calls() [2/2]

int64_t Halide::Internal::Autoscheduler::LoopNest::max_inlined_calls ( ) const

◆ accesses_input_buffer() [2/2]

bool Halide::Internal::Autoscheduler::LoopNest::accesses_input_buffer ( ) const

◆ computes() [2/2]

bool Halide::Internal::Autoscheduler::LoopNest::computes ( const FunctionDAG::Node * f ) const

◆ inline_func() [2/2]

void Halide::Internal::Autoscheduler::LoopNest::inline_func ( const FunctionDAG::Node * f )

◆ compute_here() [2/2]

bool Halide::Internal::Autoscheduler::LoopNest::compute_here	(	const FunctionDAG::Node *	f,
		bool	tileable,
		int	v,
		bool	in_threads_loop,
		const Anderson2021Params &	params,
		const Target &	target )

References tileable.

◆ parallelize_in_tiles() [2/2]

IntrusivePtr< const LoopNest > Halide::Internal::Autoscheduler::LoopNest::parallelize_in_tiles	(	const vector< int64_t > &	tiling,
		const LoopNest *	parent,
		const Anderson2021Params &	params,
		const Target &	target,
		bool	inner_tiling,
		bool	adjust_tiling,
		bool	move_all_rvars_inward = true,
		const vector< int > &	rvars_to_move_inward = {} ) const

◆ get_total_local_mem_alloc_size()

int64_t Halide::Internal::Autoscheduler::LoopNest::get_total_local_mem_alloc_size	(	bool	constant_allocs_only = false,
		bool	in_threads_loop = false ) const

◆ get_total_constant_local_mem_alloc_size()

int64_t Halide::Internal::Autoscheduler::LoopNest::get_total_constant_local_mem_alloc_size ( ) const

◆ requires_dynamic_allocation()

bool Halide::Internal::Autoscheduler::LoopNest::requires_dynamic_allocation	(	const FunctionDAG::Node *	f,
		const Target &	target,
		bool	in_threads_loop ) const

◆ compute_in_tiles() [2/2]

vector< IntrusivePtr< const LoopNest > > Halide::Internal::Autoscheduler::LoopNest::compute_in_tiles	(	const FunctionDAG::Node *	f,
		const LoopNest *	parent,
		const Anderson2021Params &	params,
		const Target &	target,
		const SearchSpaceOptions &	search_space_options,
		int	v,
		bool	in_realization,
		bool	in_threads_loop,
		bool	is_pre_pass,
		vector< int64_t >	union_counts = vector< int64_t >() ) const

References parallel, vector_dim, and vectorized_loop_index.

◆ has_constant_region_computed()

bool Halide::Internal::Autoscheduler::LoopNest::has_constant_region_computed ( const FunctionDAG::Node * node ) const

References node.

◆ has_constant_region_required()

bool Halide::Internal::Autoscheduler::LoopNest::has_constant_region_required ( const FunctionDAG::Node * node ) const

References node.

◆ other_stage_has_same_producer()

bool Halide::Internal::Autoscheduler::LoopNest::other_stage_has_same_producer ( const FunctionDAG::Node * producer ) const

◆ num_serial_loops() [1/2]

int Halide::Internal::Autoscheduler::LoopNest::num_serial_loops ( const FunctionDAG::Node::Stage * stage ) const

References stage.

◆ num_serial_loops() [2/2]

int Halide::Internal::Autoscheduler::LoopNest::num_serial_loops ( ) const

◆ producer_computed_here_or_further_in()

bool Halide::Internal::Autoscheduler::LoopNest::producer_computed_here_or_further_in ( const FunctionDAG::Node * producer ) const

◆ update_producers_to_be_staged()

void Halide::Internal::Autoscheduler::LoopNest::update_producers_to_be_staged	(	StageScheduleState &	state,
		const NodeMap< bool > &	all_inlined ) const

◆ region_computed_shrinks()

bool Halide::Internal::Autoscheduler::LoopNest::region_computed_shrinks	(	const FunctionDAG::Node *	f,
		const LoopNest *	parent ) const

◆ apply() [2/2]

void Halide::Internal::Autoscheduler::LoopNest::apply	(	LoopLevel	here,
		StageMap< std::unique_ptr< StageScheduleState > > &	state_map,
		double	num_cores,
		int	depth,
		const LoopNest *	parent,
		const LoopNest *	compute_site,
		const Target &	target,
		std::vector< StageScheduleState * > &	ancestors,
		const NodeMap< bool > &	all_inlined ) const

◆ max_idle_lane_wastage()

double Halide::Internal::Autoscheduler::LoopNest::max_idle_lane_wastage	(	const Target &	target,
		GPULoopInfo	gpu_loop_info ) const

◆ has_valid_thread_extents()

bool Halide::Internal::Autoscheduler::LoopNest::has_valid_thread_extents ( ) const

◆ collect_nodes_that_should_be_inlined()

void Halide::Internal::Autoscheduler::LoopNest::collect_nodes_that_should_be_inlined	(	const NodeMap< bool > &	nodes_to_freeze,
		NodeMap< bool > &	inlined_nodes ) const

◆ collect_all_inlined()

void Halide::Internal::Autoscheduler::LoopNest::collect_all_inlined ( NodeMap< bool > & all_inlined ) const

◆ product_of_self_and_descendants()

int64_t Halide::Internal::Autoscheduler::LoopNest::product_of_self_and_descendants ( int loop_index ) const

◆ product_of_descendants()

int64_t Halide::Internal::Autoscheduler::LoopNest::product_of_descendants ( int loop_index ) const

◆ get_stages_computed_in_each_compute_root_loop()

void Halide::Internal::Autoscheduler::LoopNest::get_stages_computed_in_each_compute_root_loop	(	StageMap< StageMap< bool > > &	descendants,
		const LoopNest *	compute_root_loop_nest = nullptr ) const

Member Data Documentation

◆ ref_count

RefCount Halide::Internal::Autoscheduler::LoopNest::ref_count

mutable

Definition at line 35 of file LoopNest.h.

◆ size [1/2]

std::vector<int64_t> Halide::Internal::Autoscheduler::LoopNest::size

Definition at line 39 of file LoopNest.h.

Referenced by is_scalar().

◆ children [1/2]

std::vector<IntrusivePtr<const LoopNest> > Halide::Internal::Autoscheduler::LoopNest::children

Definition at line 42 of file LoopNest.h.

Referenced by Halide::Internal::Autoscheduler::deep_copy_loop_nest(), funcs_realized_or_inlined(), and set_working_set_at_task_feature().

◆ inlined

NodeMap< int64_t > Halide::Internal::Autoscheduler::LoopNest::inlined

Definition at line 46 of file LoopNest.h.

Referenced by funcs_realized_or_inlined().

◆ store_at

std::set< const FunctionDAG::Node * > Halide::Internal::Autoscheduler::LoopNest::store_at

Definition at line 49 of file LoopNest.h.

Referenced by funcs_realized_or_inlined().

◆ bounds

NodeMap< Bound > Halide::Internal::Autoscheduler::LoopNest::bounds

mutable

Definition at line 54 of file LoopNest.h.

Referenced by set_bounds().

◆ node

const FunctionDAG::Node * Halide::Internal::Autoscheduler::LoopNest::node = nullptr

Definition at line 57 of file LoopNest.h.

Referenced by compute_gpu_store_features(), compute_local_mem_store_features(), compute_mem_load_features(), compute_mem_store_info(), compute_num_mem_accesses_per_block(), find_pure_stage_loop_nest(), get_pure_stage_vectorized_loop_index(), has_constant_region_computed(), has_constant_region_required(), and is_root().

◆ stage

const FunctionDAG::Node::Stage * Halide::Internal::Autoscheduler::LoopNest::stage = nullptr

Definition at line 60 of file LoopNest.h.

Referenced by num_serial_loops().

◆ innermost

bool Halide::Internal::Autoscheduler::LoopNest::innermost = false

Definition at line 63 of file LoopNest.h.

Referenced by compute_licm_amortization().

◆ tileable

bool Halide::Internal::Autoscheduler::LoopNest::tileable = false

Definition at line 66 of file LoopNest.h.

Referenced by compute_here(), and compute_here().

◆ parallel

bool Halide::Internal::Autoscheduler::LoopNest::parallel = false

Definition at line 69 of file LoopNest.h.

Referenced by compute_in_tiles().

◆ vector_dim

int Halide::Internal::Autoscheduler::LoopNest::vector_dim = -1

Definition at line 72 of file LoopNest.h.

Referenced by compute_in_tiles().

◆ vectorized_loop_index

int Halide::Internal::Autoscheduler::LoopNest::vectorized_loop_index = -1

Definition at line 75 of file LoopNest.h.

Referenced by compute_in_tiles().

◆ feature_intermediates_cache

std::map<uint64_t, StageMap<StageMap<FeatureIntermediates> > > Halide::Internal::Autoscheduler::LoopNest::feature_intermediates_cache

mutable

Definition at line 265 of file LoopNest.h.

◆ features_cache

std::map<uint64_t, StageMap<ScheduleFeatures> > Halide::Internal::Autoscheduler::LoopNest::features_cache

mutable

Definition at line 267 of file LoopNest.h.

◆ size [2/2]

vector<int64_t> Halide::Internal::Autoscheduler::LoopNest::size

Definition at line 92 of file LoopNest.h.

◆ children [2/2]

vector<IntrusivePtr<const LoopNest> > Halide::Internal::Autoscheduler::LoopNest::children

Definition at line 95 of file LoopNest.h.

◆ gpu_label

GPU_parallelism Halide::Internal::Autoscheduler::LoopNest::gpu_label = GPU_parallelism::None

mutable

Definition at line 131 of file LoopNest.h.

Referenced by is_gpu_block(), is_gpu_serial(), and is_gpu_thread().

◆ feature_intermediates

std::map<uint64_t, StageMap<StageMap<FeatureIntermediates> > > Halide::Internal::Autoscheduler::LoopNest::feature_intermediates

mutable

Definition at line 145 of file LoopNest.h.

◆ features

std::map<uint64_t, StageMap<ScheduleFeatures> > Halide::Internal::Autoscheduler::LoopNest::features

mutable

Definition at line 146 of file LoopNest.h.

Referenced by compute_features(), compute_features(), compute_warp_features(), compute_working_set_from_features(), memoize_features(), memoize_points_computed_minimum(), recompute_inlined_features(), and set_working_set_at_task_feature().

The documentation for this struct was generated from the following files:

src/autoschedulers/adams2019/LoopNest.h
src/autoschedulers/anderson2021/LoopNest.h

Classes

Public Member Functions

Static Public Member Functions

Public Attributes

Detailed Description

Member Function Documentation

◆ copy_from() [1/2]

◆ hash_combine() [1/2]

◆ structural_hash() [1/2]

◆ funcs_realized_or_inlined() [1/2]

◆ get_sites() [1/2]

◆ set_working_set_at_task_feature() [1/2]

◆ compute_features() [1/2]

◆ is_root() [1/2]

◆ set_bounds() [1/2]

◆ get_bounds() [1/2]

◆ dump() [1/3]

◆ calls() [1/2]

◆ max_inlined_calls() [1/2]

◆ accesses_input_buffer() [1/2]

◆ computes() [1/2]

◆ inline_func() [1/2]

◆ compute_here() [1/2]

◆ parallelize_in_tiles() [1/2]

◆ compute_in_tiles() [1/2]

◆ apply() [1/2]

◆ copy_from_including_features() [1/2]

◆ memoize_points_computed_minimum() [1/2]

◆ memoize_features() [1/2]

◆ compute_working_set_from_features() [1/2]

◆ recompute_inlined_features() [1/2]

◆ compute_hash_of_producers_stored_at_root() [1/2]

◆ collect_producers() [1/2]

◆ collect_stages() [1/2]

◆ is_gpu_serial()

◆ is_gpu_thread()

◆ is_gpu_block()

◆ is_scalar()

◆ get_union_thread_counts()

◆ get_stage_sizes()

◆ generate_vec_dim_serial_tilings()

◆ add_gpu_thread_tilings()

◆ copy_from() [2/2]

◆ copy_from_including_features() [2/2]

◆ hash_combine() [2/2]

◆ structural_hash() [2/2]

◆ funcs_realized_or_inlined() [2/2]

◆ get_gpu_memory_type()

◆ unrolled_loops()

◆ get_allocs_that_can_be_promoted_to_registers()

◆ promote_allocs_to_registers()

◆ get_sites() [2/2]

◆ set_working_set_at_task_feature() [2/2]

◆ exceeds_serial_extents_limit()

◆ node_has_dynamic_region_computed()

◆ has_dynamic_allocation_inside_thread()

◆ find_pure_stage_loop_nest()

◆ get_pure_stage_vectorized_loop_index()

◆ get_vectorized_loop_index_from_pure_stage()

◆ storage_stride()

◆ compute_strides()

◆ all_strides_exist()

◆ get_actual_vector_dim()

◆ compute_gpu_store_features()

◆ can_vectorize_access_for_innermost_dim()

◆ can_vectorize_store_access()

◆ vectorized_load_access_size()

◆ vectorized_access_size()

◆ compute_num_mem_accesses_per_block()

◆ compute_local_mem_store_features()

◆ compute_mem_store_info()

◆ compute_mem_load_features()

◆ compute_local_mem_stride()

◆ get_enclosing_block()

◆ get_block_and_serial_extents()

◆ all_paths_to_leaves_have_thread_loop()

◆ has_thread_loop_descendant()

◆ compute_warp_features()

◆ compute_warp_and_block_occupancy()

◆ compute_shared_mem_occupancy()