Halide
Halide::Internal::Autoscheduler Namespace Reference

Classes

struct  Adams2019Params
 
struct  Anderson2021Params
 
struct  BoundContents
 
struct  Cache
 
struct  CachingOptions
 
class  ExprBranching
 
struct  Filter
 
struct  FunctionDAG
 
struct  GlobalAccessAccumulator
 
struct  GPULoopInfo
 
class  LoadJacobian
 
struct  LocalAccessAccumulator
 
struct  LoopNest
 
class  LoopNestParser
 
struct  MemInfo
 
struct  MemTraits
 
struct  MemTraits< GlobalMem >
 
struct  MemTraits< LocalMem >
 
struct  MemTraits< SharedMem >
 
struct  NoOpMutator
 
struct  OptionalRational
 
class  ParamParser
 
struct  ScopedStatistic
 
struct  ScopedTimer
 
struct  SearchSpace
 
struct  SearchSpaceOptions
 
struct  SharedAccessAccumulator
 
class  Span
 
struct  State
 
class  StateQueue
 
struct  Statistics
 
struct  Strides
 
struct  ThreadInfo
 
struct  ThreadTileOption
 
struct  Timer
 

Typedefs

typedef PerfectHashMap< FunctionDAG::Node::Stage, ScheduleFeaturesStageMapOfScheduleFeatures
 
using BlockCache = NodeMap< std::map< int, std::vector< IntrusivePtr< const LoopNest > >> >
 
using Bound = IntrusivePtr< const BoundContents >
 
template<typename T >
using NodeMap = PerfectHashMap< FunctionDAG::Node, T >
 
template<typename T >
using StageMap = PerfectHashMap< FunctionDAG::Node::Stage, T >
 
using Clock = std::chrono::high_resolution_clock
 
template<typename T >
using Accumulator = typename MemTraits< T >::Accumulator
 
template<typename T >
using MemInfoType = MemInfo< typename MemTraits< T >::MemInfoType >
 
using GlobalMemInfo = MemInfoType< GlobalMem >
 
using SharedMemInfo = MemInfoType< SharedMem >
 
using LocalMemInfo = MemInfoType< LocalMem >
 

Enumerations

enum  GPU_parallelism {
  GPU_parallelism::Block, GPU_parallelism::Thread, GPU_parallelism::Serial, GPU_parallelism::Simd,
  GPU_parallelism::Parallelized, GPU_parallelism::None
}
 
enum  GPUMemoryType {
  GPUMemoryType::Global, GPUMemoryType::Shared, GPUMemoryType::Local, GPUMemoryType::Registers,
  GPUMemoryType::Inlined
}
 

Functions

void find_and_apply_schedule (FunctionDAG &dag, const std::vector< Function > &outputs, const Adams2019Params &params, CostModel *cost_model, int beam_size, StageMapOfScheduleFeatures *schedule_features)
 
std::vector< std::vector< int64_t > > generate_tilings (const vector< int64_t > &s, int d, int factor, bool allow_splits)
 
const LoopNestdeepest_common_ancestor (const std::map< const LoopNest *, std::pair< const LoopNest *, int >> &parents, const LoopNest *a, const LoopNest *b)
 
void compute_loop_nest_parents (std::map< const LoopNest *, std::pair< const LoopNest *, int >> &parents, const LoopNest *here, int depth)
 
void find_and_apply_schedule (FunctionDAG &dag, const std::vector< Function > &outputs, const Anderson2021Params &params, const Target &target, CostModel *cost_model, int beam_size, StageMapOfScheduleFeatures *schedule_features)
 
void sanitize_names (std::string &str)
 
std::string stringify (GPU_parallelism label)
 
bool may_subtile (const Anderson2021Params &params)
 
int64_t get_shared_memory_limit (const Anderson2021Params &params)
 
int64_t get_active_block_hardware_limit (const Anderson2021Params &params)
 
int64_t get_active_warp_hardware_limit (const Anderson2021Params &params)
 
constexpr int64_t get_register_mem_alloc_limit ()
 
int get_unroll_limit (const Target &target)
 
bool in_range_zero_one (double x)
 
bool are_valid_thread_extents (const vector< int64_t > &counts)
 
double get_idle_lane_wastage_limit_env_var ()
 
double get_idle_lane_wastage_limit ()
 
bool all (const vector< int > &v)
 
bool accessed_at_constant_indices (const std::vector< int > &unrolled, const FunctionDAG::Edge *e)
 
bool verify_memoized_features ()
 
bool is_memoize_blocks_enabled ()
 
double get_stack_memory_adjustment_factor ()
 
int64_t get_stack_memory_limit ()
 
bool use_adjusted_tilings ()
 
bool compute_root_and_inline_only ()
 
template<typename PostCreateMutator >
void deep_copy_loop_nest (LoopNest *new_loop_nest, const LoopNest *new_loop_nest_parent, const IntrusivePtr< const LoopNest > &existing_loop_nest, const PostCreateMutator &post_create_mutator)
 
template<typename PostCreateMutator >
LoopNestdeep_copy_loop_nest (const IntrusivePtr< const LoopNest > &loop_nest, const PostCreateMutator &post_create_mutator)
 
template<typename A , typename B >
void expect_eq (int line, const A &expected, const B &actual)
 
template<typename A , typename B >
void approx_eq (int line, const A &expected, const B &actual, float epsilon)
 
template<typename A >
void expect (int line, const A &expected)
 
bool all_ones (const std::vector< int64_t > &nums)
 
bool equal_to_existing_size (const std::vector< int64_t > &s, const std::vector< int64_t > &nums)
 
std::vector< std::vector< int64_t > > generate_serial_tilings (const std::vector< int64_t > &s, int d, int last_d, int vectorized_index, const std::vector< int > &vec_dim_serial_sizes, bool filter_small_outer_extents=false, bool allow_inner_ones=false)
 
std::vector< std::vector< int64_t > > generate_tilings (const std::vector< int64_t > &s, int d, int factor, bool allow_splits, const std::vector< int > &inner_sizes=std::vector< int >())
 
void lowered_dims (const std::vector< int64_t > &size, int vector_loop_i, std::vector< int64_t > &lowered_size)
 moves vectorized dimension first and also removes dimensions with size 1 to reflect actual thread dimensions when loop nests are lowered More...
 
std::vector< std::vector< int64_t > > generate_gpu_tilings (const std::vector< std::vector< int64_t >> &stage_sizes, const std::vector< std::vector< int >> &pure_dims, const std::vector< int64_t > &max_s, int d, const std::vector< int > &vectorized_indices, bool serial_inner, bool is_compute_root_stage)
 

Variables

constexpr int kLocalMemoryLimit = 524288
 

Typedef Documentation

◆ StageMapOfScheduleFeatures

◆ BlockCache

using Halide::Internal::Autoscheduler::BlockCache = typedef NodeMap<std::map<int, std::vector<IntrusivePtr<const LoopNest> >> >

Definition at line 89 of file Cache.h.

◆ Bound

◆ NodeMap

Definition at line 21 of file LoopNest.h.

◆ StageMap

Definition at line 24 of file LoopNest.h.

◆ Clock

typedef std::chrono::high_resolution_clock Halide::Internal::Autoscheduler::Clock

Definition at line 15 of file Timer.h.

◆ Accumulator

template<typename T >
using Halide::Internal::Autoscheduler::Accumulator = typedef typename MemTraits<T>::Accumulator

Definition at line 53 of file GPUMemInfo.h.

◆ MemInfoType

template<typename T >
using Halide::Internal::Autoscheduler::MemInfoType = typedef MemInfo<typename MemTraits<T>::MemInfoType>

Definition at line 109 of file GPUMemInfo.h.

◆ GlobalMemInfo

Definition at line 111 of file GPUMemInfo.h.

◆ SharedMemInfo

Definition at line 112 of file GPUMemInfo.h.

◆ LocalMemInfo

Definition at line 113 of file GPUMemInfo.h.

Enumeration Type Documentation

◆ GPU_parallelism

Enumerator
Block 
Thread 
Serial 
Simd 
Parallelized 
None 

Definition at line 32 of file LoopNest.h.

◆ GPUMemoryType

Enumerator
Global 
Shared 
Local 
Registers 
Inlined 

Definition at line 42 of file LoopNest.h.

Function Documentation

◆ find_and_apply_schedule() [1/2]

void Halide::Internal::Autoscheduler::find_and_apply_schedule ( FunctionDAG dag,
const std::vector< Function > &  outputs,
const Adams2019Params params,
CostModel cost_model,
int  beam_size,
StageMapOfScheduleFeatures schedule_features 
)

◆ generate_tilings() [1/2]

std::vector<std::vector<int64_t> > Halide::Internal::Autoscheduler::generate_tilings ( const vector< int64_t > &  s,
int  d,
int  factor,
bool  allow_splits 
)

◆ deepest_common_ancestor()

const LoopNest* Halide::Internal::Autoscheduler::deepest_common_ancestor ( const std::map< const LoopNest *, std::pair< const LoopNest *, int >> &  parents,
const LoopNest a,
const LoopNest b 
)

◆ compute_loop_nest_parents()

void Halide::Internal::Autoscheduler::compute_loop_nest_parents ( std::map< const LoopNest *, std::pair< const LoopNest *, int >> &  parents,
const LoopNest here,
int  depth 
)

◆ find_and_apply_schedule() [2/2]

void Halide::Internal::Autoscheduler::find_and_apply_schedule ( FunctionDAG dag,
const std::vector< Function > &  outputs,
const Anderson2021Params params,
const Target target,
CostModel cost_model,
int  beam_size,
StageMapOfScheduleFeatures schedule_features 
)

◆ sanitize_names()

void Halide::Internal::Autoscheduler::sanitize_names ( std::string &  str)

◆ stringify()

std::string Halide::Internal::Autoscheduler::stringify ( GPU_parallelism  label)

◆ may_subtile()

bool Halide::Internal::Autoscheduler::may_subtile ( const Anderson2021Params params)

◆ get_shared_memory_limit()

int64_t Halide::Internal::Autoscheduler::get_shared_memory_limit ( const Anderson2021Params params)

◆ get_active_block_hardware_limit()

int64_t Halide::Internal::Autoscheduler::get_active_block_hardware_limit ( const Anderson2021Params params)

◆ get_active_warp_hardware_limit()

int64_t Halide::Internal::Autoscheduler::get_active_warp_hardware_limit ( const Anderson2021Params params)

◆ get_register_mem_alloc_limit()

constexpr int64_t Halide::Internal::Autoscheduler::get_register_mem_alloc_limit ( )
constexpr

Definition at line 56 of file LoopNest.h.

◆ get_unroll_limit()

int Halide::Internal::Autoscheduler::get_unroll_limit ( const Target target)

◆ in_range_zero_one()

bool Halide::Internal::Autoscheduler::in_range_zero_one ( double  x)

◆ are_valid_thread_extents()

bool Halide::Internal::Autoscheduler::are_valid_thread_extents ( const vector< int64_t > &  counts)

◆ get_idle_lane_wastage_limit_env_var()

double Halide::Internal::Autoscheduler::get_idle_lane_wastage_limit_env_var ( )

◆ get_idle_lane_wastage_limit()

double Halide::Internal::Autoscheduler::get_idle_lane_wastage_limit ( )

◆ all()

bool Halide::Internal::Autoscheduler::all ( const vector< int > &  v)

◆ accessed_at_constant_indices()

bool Halide::Internal::Autoscheduler::accessed_at_constant_indices ( const std::vector< int > &  unrolled,
const FunctionDAG::Edge e 
)

◆ verify_memoized_features()

bool Halide::Internal::Autoscheduler::verify_memoized_features ( )

◆ is_memoize_blocks_enabled()

bool Halide::Internal::Autoscheduler::is_memoize_blocks_enabled ( )

◆ get_stack_memory_adjustment_factor()

double Halide::Internal::Autoscheduler::get_stack_memory_adjustment_factor ( )

◆ get_stack_memory_limit()

int64_t Halide::Internal::Autoscheduler::get_stack_memory_limit ( )

◆ use_adjusted_tilings()

bool Halide::Internal::Autoscheduler::use_adjusted_tilings ( )

◆ compute_root_and_inline_only()

bool Halide::Internal::Autoscheduler::compute_root_and_inline_only ( )

◆ deep_copy_loop_nest() [1/2]

template<typename PostCreateMutator >
void Halide::Internal::Autoscheduler::deep_copy_loop_nest ( LoopNest new_loop_nest,
const LoopNest new_loop_nest_parent,
const IntrusivePtr< const LoopNest > &  existing_loop_nest,
const PostCreateMutator &  post_create_mutator 
)

◆ deep_copy_loop_nest() [2/2]

template<typename PostCreateMutator >
LoopNest* Halide::Internal::Autoscheduler::deep_copy_loop_nest ( const IntrusivePtr< const LoopNest > &  loop_nest,
const PostCreateMutator &  post_create_mutator 
)

Definition at line 63 of file State.h.

References deep_copy_loop_nest().

◆ expect_eq()

template<typename A , typename B >
void Halide::Internal::Autoscheduler::expect_eq ( int  line,
const A &  expected,
const B &  actual 
)

Definition at line 16 of file test.h.

References user_assert.

◆ approx_eq()

template<typename A , typename B >
void Halide::Internal::Autoscheduler::approx_eq ( int  line,
const A &  expected,
const B &  actual,
float  epsilon 
)

Definition at line 24 of file test.h.

References Halide::abs(), and user_assert.

◆ expect()

template<typename A >
void Halide::Internal::Autoscheduler::expect ( int  line,
const A &  expected 
)

Definition at line 32 of file test.h.

References user_assert.

◆ all_ones()

bool Halide::Internal::Autoscheduler::all_ones ( const std::vector< int64_t > &  nums)

◆ equal_to_existing_size()

bool Halide::Internal::Autoscheduler::equal_to_existing_size ( const std::vector< int64_t > &  s,
const std::vector< int64_t > &  nums 
)

◆ generate_serial_tilings()

std::vector<std::vector<int64_t> > Halide::Internal::Autoscheduler::generate_serial_tilings ( const std::vector< int64_t > &  s,
int  d,
int  last_d,
int  vectorized_index,
const std::vector< int > &  vec_dim_serial_sizes,
bool  filter_small_outer_extents = false,
bool  allow_inner_ones = false 
)

◆ generate_tilings() [2/2]

std::vector<std::vector<int64_t> > Halide::Internal::Autoscheduler::generate_tilings ( const std::vector< int64_t > &  s,
int  d,
int  factor,
bool  allow_splits,
const std::vector< int > &  inner_sizes = std::vector< int >() 
)

◆ lowered_dims()

void Halide::Internal::Autoscheduler::lowered_dims ( const std::vector< int64_t > &  size,
int  vector_loop_i,
std::vector< int64_t > &  lowered_size 
)

moves vectorized dimension first and also removes dimensions with size 1 to reflect actual thread dimensions when loop nests are lowered

◆ generate_gpu_tilings()

std::vector<std::vector<int64_t> > Halide::Internal::Autoscheduler::generate_gpu_tilings ( const std::vector< std::vector< int64_t >> &  stage_sizes,
const std::vector< std::vector< int >> &  pure_dims,
const std::vector< int64_t > &  max_s,
int  d,
const std::vector< int > &  vectorized_indices,
bool  serial_inner,
bool  is_compute_root_stage 
)

Variable Documentation

◆ kLocalMemoryLimit

constexpr int Halide::Internal::Autoscheduler::kLocalMemoryLimit = 524288
constexpr

Definition at line 32 of file State.h.