Halide 21.0.0
Halide compiler and libraries
Loading...
Searching...
No Matches
Halide::Internal::Autoscheduler Namespace Reference

Classes

struct  Adams2019Params
struct  Anderson2021Params
struct  BoundContents
struct  Cache
struct  CachingOptions
class  ExprBranching
struct  Filter
struct  FunctionDAG
struct  GlobalAccessAccumulator
struct  GPULoopInfo
class  LoadJacobian
struct  LocalAccessAccumulator
struct  LoopNest
class  LoopNestParser
class  map
 STL class. More...
struct  MemInfo
struct  MemTraits
struct  MemTraits< GlobalMem >
struct  MemTraits< LocalMem >
struct  MemTraits< SharedMem >
struct  NoOpMutator
struct  OptionalRational
class  pair
 STL class. More...
class  ParamParser
struct  ScopedStatistic
struct  ScopedTimer
struct  SearchSpace
struct  SearchSpaceOptions
class  set
 STL class. More...
struct  SharedAccessAccumulator
class  Span
struct  State
class  StateQueue
struct  Statistics
struct  Strides
class  string
 STL class. More...
struct  ThreadInfo
struct  ThreadTileOption
struct  Timer
class  unique_ptr
 STL class. More...
class  unordered_set
 STL class. More...
class  vector
 STL class. More...

Typedefs

typedef PerfectHashMap< FunctionDAG::Node::Stage, ScheduleFeaturesStageMapOfScheduleFeatures
using BlockCache = NodeMap<std::map<int, std::vector<IntrusivePtr<const LoopNest>>>>
using Bound = IntrusivePtr<const BoundContents>
template<typename T>
using NodeMap = PerfectHashMap<FunctionDAG::Node, T>
template<typename T>
using StageMap = PerfectHashMap<FunctionDAG::Node::Stage, T>
using Clock = std::chrono::high_resolution_clock
template<typename T>
using Accumulator = typename MemTraits<T>::Accumulator
template<typename T>
using MemInfoType = MemInfo<typename MemTraits<T>::MemInfoType>
using GlobalMemInfo = MemInfoType<GlobalMem>
using SharedMemInfo = MemInfoType<SharedMem>
using LocalMemInfo = MemInfoType<LocalMem>
using LoopNestMap = map<const LoopNest *, pair<const LoopNest *, int>>

Enumerations

enum class  GPU_parallelism {
  Block , Thread , Serial , Simd ,
  Parallelized , None
}
enum class  GPUMemoryType {
  Global , Shared , Local , Registers ,
  Inlined
}

Functions

void find_and_apply_schedule (FunctionDAG &dag, const std::vector< Function > &outputs, const Adams2019Params &params, CostModel *cost_model, int beam_size, StageMapOfScheduleFeatures *schedule_features)
std::vector< std::vector< int64_t > > generate_tilings (const vector< int64_t > &s, int d, int factor, bool allow_splits)
const LoopNestdeepest_common_ancestor (const std::map< const LoopNest *, std::pair< const LoopNest *, int > > &parents, const LoopNest *a, const LoopNest *b)
void compute_loop_nest_parents (std::map< const LoopNest *, std::pair< const LoopNest *, int > > &parents, const LoopNest *here, int depth)
void find_and_apply_schedule (FunctionDAG &dag, const std::vector< Function > &outputs, const Anderson2021Params &params, const Target &target, CostModel *cost_model, int beam_size, StageMapOfScheduleFeatures *schedule_features)
void sanitize_names (std::string &str)
std::string stringify (GPU_parallelism label)
bool may_subtile (const Anderson2021Params &params)
int64_t get_shared_memory_limit (const Anderson2021Params &params)
int64_t get_shared_memory_sm_limit (const Anderson2021Params &params)
int64_t get_active_block_hardware_limit (const Anderson2021Params &params)
int64_t get_active_warp_hardware_limit (const Anderson2021Params &params)
constexpr int64_t get_register_mem_alloc_limit ()
int get_unroll_limit (const Target &target)
bool in_range_zero_one (double x)
bool are_valid_thread_extents (const vector< int64_t > &counts)
double get_idle_lane_wastage_limit_env_var ()
double get_idle_lane_wastage_limit ()
bool all (const vector< int > &v)
bool accessed_at_constant_indices (const std::vector< int > &unrolled, const FunctionDAG::Edge *e)
bool verify_memoized_features ()
bool is_memoize_blocks_enabled ()
double get_stack_memory_adjustment_factor ()
int64_t get_stack_memory_limit ()
bool use_adjusted_tilings ()
bool compute_root_and_inline_only ()
template<typename PostCreateMutator>
void deep_copy_loop_nest (LoopNest *new_loop_nest, const LoopNest *new_loop_nest_parent, const IntrusivePtr< const LoopNest > &existing_loop_nest, const PostCreateMutator &post_create_mutator)
template<typename PostCreateMutator>
LoopNestdeep_copy_loop_nest (const IntrusivePtr< const LoopNest > &loop_nest, const PostCreateMutator &post_create_mutator)
template<typename A, typename B>
void expect_eq (int line, const A &expected, const B &actual)
template<typename A, typename B>
void approx_eq (int line, const A &expected, const B &actual, float epsilon)
template<typename A>
void expect (int line, const A &expected)
bool all_ones (const std::vector< int64_t > &nums)
bool equal_to_existing_size (const std::vector< int64_t > &s, const std::vector< int64_t > &nums)
std::vector< std::vector< int64_t > > generate_serial_tilings (const std::vector< int64_t > &s, int d, int last_d, int vectorized_index, const std::vector< int > &vec_dim_serial_sizes, bool filter_small_outer_extents=false, bool allow_inner_ones=false)
std::vector< std::vector< int64_t > > generate_tilings (const std::vector< int64_t > &s, int d, int factor, bool allow_splits, const std::vector< int > &inner_sizes=std::vector< int >())
void lowered_dims (const std::vector< int64_t > &size, int vector_loop_i, std::vector< int64_t > &lowered_size)
 moves vectorized dimension first and also removes dimensions with size 1 to reflect actual thread dimensions when loop nests are lowered
std::vector< std::vector< int64_t > > generate_gpu_tilings (const std::vector< std::vector< int64_t > > &stage_sizes, const std::vector< std::vector< int > > &pure_dims, const std::vector< int64_t > &max_s, int d, const std::vector< int > &vectorized_indices, bool serial_inner, bool is_compute_root_stage)

Variables

constexpr int kLocalMemoryLimit = 524288

Typedef Documentation

◆ StageMapOfScheduleFeatures

◆ BlockCache

using Halide::Internal::Autoscheduler::BlockCache = NodeMap<std::map<int, std::vector<IntrusivePtr<const LoopNest>>>>

Definition at line 89 of file Cache.h.

◆ Bound

◆ NodeMap

Definition at line 21 of file LoopNest.h.

◆ StageMap

Definition at line 24 of file LoopNest.h.

◆ Clock

using Halide::Internal::Autoscheduler::Clock = std::chrono::high_resolution_clock

Definition at line 15 of file Timer.h.

◆ Accumulator

template<typename T>
using Halide::Internal::Autoscheduler::Accumulator = typename MemTraits<T>::Accumulator

Definition at line 52 of file GPUMemInfo.h.

◆ MemInfoType

template<typename T>
using Halide::Internal::Autoscheduler::MemInfoType = MemInfo<typename MemTraits<T>::MemInfoType>

Definition at line 108 of file GPUMemInfo.h.

◆ GlobalMemInfo

Definition at line 110 of file GPUMemInfo.h.

◆ SharedMemInfo

Definition at line 111 of file GPUMemInfo.h.

◆ LocalMemInfo

Definition at line 112 of file GPUMemInfo.h.

◆ LoopNestMap

using Halide::Internal::Autoscheduler::LoopNestMap = map<const LoopNest *, pair<const LoopNest *, int>>

Definition at line 65 of file State.h.

Enumeration Type Documentation

◆ GPU_parallelism

Enumerator
Block 
Thread 
Serial 
Simd 
Parallelized 
None 

Definition at line 32 of file LoopNest.h.

◆ GPUMemoryType

Enumerator
Global 
Shared 
Local 
Registers 
Inlined 

Definition at line 44 of file LoopNest.h.

Function Documentation

◆ find_and_apply_schedule() [1/2]

void Halide::Internal::Autoscheduler::find_and_apply_schedule ( FunctionDAG & dag,
const std::vector< Function > & outputs,
const Adams2019Params & params,
CostModel * cost_model,
int beam_size,
StageMapOfScheduleFeatures * schedule_features )

◆ generate_tilings() [1/2]

std::vector< std::vector< int64_t > > Halide::Internal::Autoscheduler::generate_tilings ( const vector< int64_t > & s,
int d,
int factor,
bool allow_splits )

◆ deepest_common_ancestor()

const LoopNest * Halide::Internal::Autoscheduler::deepest_common_ancestor ( const std::map< const LoopNest *, std::pair< const LoopNest *, int > > & parents,
const LoopNest * a,
const LoopNest * b )

◆ compute_loop_nest_parents()

void Halide::Internal::Autoscheduler::compute_loop_nest_parents ( std::map< const LoopNest *, std::pair< const LoopNest *, int > > & parents,
const LoopNest * here,
int depth )

◆ find_and_apply_schedule() [2/2]

void Halide::Internal::Autoscheduler::find_and_apply_schedule ( FunctionDAG & dag,
const std::vector< Function > & outputs,
const Anderson2021Params & params,
const Target & target,
CostModel * cost_model,
int beam_size,
StageMapOfScheduleFeatures * schedule_features )

◆ sanitize_names()

void Halide::Internal::Autoscheduler::sanitize_names ( std::string & str)

◆ stringify()

std::string Halide::Internal::Autoscheduler::stringify ( GPU_parallelism label)

◆ may_subtile()

bool Halide::Internal::Autoscheduler::may_subtile ( const Anderson2021Params & params)

◆ get_shared_memory_limit()

int64_t Halide::Internal::Autoscheduler::get_shared_memory_limit ( const Anderson2021Params & params)

◆ get_shared_memory_sm_limit()

int64_t Halide::Internal::Autoscheduler::get_shared_memory_sm_limit ( const Anderson2021Params & params)

◆ get_active_block_hardware_limit()

int64_t Halide::Internal::Autoscheduler::get_active_block_hardware_limit ( const Anderson2021Params & params)

◆ get_active_warp_hardware_limit()

int64_t Halide::Internal::Autoscheduler::get_active_warp_hardware_limit ( const Anderson2021Params & params)

◆ get_register_mem_alloc_limit()

int64_t Halide::Internal::Autoscheduler::get_register_mem_alloc_limit ( )
constexpr

Definition at line 62 of file LoopNest.h.

◆ get_unroll_limit()

int Halide::Internal::Autoscheduler::get_unroll_limit ( const Target & target)

◆ in_range_zero_one()

bool Halide::Internal::Autoscheduler::in_range_zero_one ( double x)

◆ are_valid_thread_extents()

bool Halide::Internal::Autoscheduler::are_valid_thread_extents ( const vector< int64_t > & counts)

◆ get_idle_lane_wastage_limit_env_var()

double Halide::Internal::Autoscheduler::get_idle_lane_wastage_limit_env_var ( )

◆ get_idle_lane_wastage_limit()

double Halide::Internal::Autoscheduler::get_idle_lane_wastage_limit ( )

◆ all()

bool Halide::Internal::Autoscheduler::all ( const vector< int > & v)

◆ accessed_at_constant_indices()

bool Halide::Internal::Autoscheduler::accessed_at_constant_indices ( const std::vector< int > & unrolled,
const FunctionDAG::Edge * e )

◆ verify_memoized_features()

bool Halide::Internal::Autoscheduler::verify_memoized_features ( )

◆ is_memoize_blocks_enabled()

bool Halide::Internal::Autoscheduler::is_memoize_blocks_enabled ( )

◆ get_stack_memory_adjustment_factor()

double Halide::Internal::Autoscheduler::get_stack_memory_adjustment_factor ( )

◆ get_stack_memory_limit()

int64_t Halide::Internal::Autoscheduler::get_stack_memory_limit ( )

◆ use_adjusted_tilings()

bool Halide::Internal::Autoscheduler::use_adjusted_tilings ( )

◆ compute_root_and_inline_only()

bool Halide::Internal::Autoscheduler::compute_root_and_inline_only ( )

◆ deep_copy_loop_nest() [1/2]

template<typename PostCreateMutator>
void Halide::Internal::Autoscheduler::deep_copy_loop_nest ( LoopNest * new_loop_nest,
const LoopNest * new_loop_nest_parent,
const IntrusivePtr< const LoopNest > & existing_loop_nest,
const PostCreateMutator & post_create_mutator )

◆ deep_copy_loop_nest() [2/2]

template<typename PostCreateMutator>
LoopNest * Halide::Internal::Autoscheduler::deep_copy_loop_nest ( const IntrusivePtr< const LoopNest > & loop_nest,
const PostCreateMutator & post_create_mutator )

Definition at line 68 of file State.h.

References deep_copy_loop_nest().

◆ expect_eq()

template<typename A, typename B>
void Halide::Internal::Autoscheduler::expect_eq ( int line,
const A & expected,
const B & actual )

Definition at line 16 of file test.h.

References user_assert.

◆ approx_eq()

template<typename A, typename B>
void Halide::Internal::Autoscheduler::approx_eq ( int line,
const A & expected,
const B & actual,
float epsilon )

Definition at line 24 of file test.h.

References user_assert.

◆ expect()

template<typename A>
void Halide::Internal::Autoscheduler::expect ( int line,
const A & expected )

Definition at line 32 of file test.h.

References user_assert.

◆ all_ones()

bool Halide::Internal::Autoscheduler::all_ones ( const std::vector< int64_t > & nums)

◆ equal_to_existing_size()

bool Halide::Internal::Autoscheduler::equal_to_existing_size ( const std::vector< int64_t > & s,
const std::vector< int64_t > & nums )

◆ generate_serial_tilings()

std::vector< std::vector< int64_t > > Halide::Internal::Autoscheduler::generate_serial_tilings ( const std::vector< int64_t > & s,
int d,
int last_d,
int vectorized_index,
const std::vector< int > & vec_dim_serial_sizes,
bool filter_small_outer_extents = false,
bool allow_inner_ones = false )

◆ generate_tilings() [2/2]

std::vector< std::vector< int64_t > > Halide::Internal::Autoscheduler::generate_tilings ( const std::vector< int64_t > & s,
int d,
int factor,
bool allow_splits,
const std::vector< int > & inner_sizes = std::vector< int >() )

◆ lowered_dims()

void Halide::Internal::Autoscheduler::lowered_dims ( const std::vector< int64_t > & size,
int vector_loop_i,
std::vector< int64_t > & lowered_size )

moves vectorized dimension first and also removes dimensions with size 1 to reflect actual thread dimensions when loop nests are lowered

◆ generate_gpu_tilings()

std::vector< std::vector< int64_t > > Halide::Internal::Autoscheduler::generate_gpu_tilings ( const std::vector< std::vector< int64_t > > & stage_sizes,
const std::vector< std::vector< int > > & pure_dims,
const std::vector< int64_t > & max_s,
int d,
const std::vector< int > & vectorized_indices,
bool serial_inner,
bool is_compute_root_stage )

Variable Documentation

◆ kLocalMemoryLimit

int Halide::Internal::Autoscheduler::kLocalMemoryLimit = 524288
constexpr

Definition at line 32 of file State.h.