Halide 19.0.0
Halide compiler and libraries
|
Classes | |
struct | Adams2019Params |
struct | Anderson2021Params |
struct | BoundContents |
struct | Cache |
struct | CachingOptions |
class | ExprBranching |
struct | Filter |
struct | FunctionDAG |
struct | GlobalAccessAccumulator |
struct | GPULoopInfo |
class | LoadJacobian |
struct | LocalAccessAccumulator |
struct | LoopNest |
class | LoopNestParser |
struct | MemInfo |
struct | MemTraits |
struct | MemTraits< GlobalMem > |
struct | MemTraits< LocalMem > |
struct | MemTraits< SharedMem > |
struct | NoOpMutator |
struct | OptionalRational |
class | ParamParser |
struct | ScopedStatistic |
struct | ScopedTimer |
struct | SearchSpace |
struct | SearchSpaceOptions |
struct | SharedAccessAccumulator |
class | Span |
struct | State |
class | StateQueue |
struct | Statistics |
struct | Strides |
struct | ThreadInfo |
struct | ThreadTileOption |
struct | Timer |
Typedefs | |
typedef PerfectHashMap< FunctionDAG::Node::Stage, ScheduleFeatures > | StageMapOfScheduleFeatures |
using | BlockCache = NodeMap<std::map<int, std::vector<IntrusivePtr<const LoopNest>>>> |
using | Bound = IntrusivePtr<const BoundContents> |
template<typename T > | |
using | NodeMap = PerfectHashMap<FunctionDAG::Node, T> |
template<typename T > | |
using | StageMap = PerfectHashMap<FunctionDAG::Node::Stage, T> |
using | Clock = std::chrono::high_resolution_clock |
template<typename T > | |
using | Accumulator = typename MemTraits<T>::Accumulator |
template<typename T > | |
using | MemInfoType = MemInfo<typename MemTraits<T>::MemInfoType> |
using | GlobalMemInfo = MemInfoType<GlobalMem> |
using | SharedMemInfo = MemInfoType<SharedMem> |
using | LocalMemInfo = MemInfoType<LocalMem> |
using | LoopNestMap = map<const LoopNest *, pair<const LoopNest *, int>> |
Enumerations | |
enum class | GPU_parallelism { Block , Thread , Serial , Simd , Parallelized , None } |
enum class | GPUMemoryType { Global , Shared , Local , Registers , Inlined } |
Functions | |
void | find_and_apply_schedule (FunctionDAG &dag, const std::vector< Function > &outputs, const Adams2019Params ¶ms, CostModel *cost_model, int beam_size, StageMapOfScheduleFeatures *schedule_features) |
std::vector< std::vector< int64_t > > | generate_tilings (const vector< int64_t > &s, int d, int factor, bool allow_splits) |
const LoopNest * | deepest_common_ancestor (const std::map< const LoopNest *, std::pair< const LoopNest *, int > > &parents, const LoopNest *a, const LoopNest *b) |
void | compute_loop_nest_parents (std::map< const LoopNest *, std::pair< const LoopNest *, int > > &parents, const LoopNest *here, int depth) |
void | find_and_apply_schedule (FunctionDAG &dag, const std::vector< Function > &outputs, const Anderson2021Params ¶ms, const Target &target, CostModel *cost_model, int beam_size, StageMapOfScheduleFeatures *schedule_features) |
void | sanitize_names (std::string &str) |
std::string | stringify (GPU_parallelism label) |
bool | may_subtile (const Anderson2021Params ¶ms) |
int64_t | get_shared_memory_limit (const Anderson2021Params ¶ms) |
int64_t | get_shared_memory_sm_limit (const Anderson2021Params ¶ms) |
int64_t | get_active_block_hardware_limit (const Anderson2021Params ¶ms) |
int64_t | get_active_warp_hardware_limit (const Anderson2021Params ¶ms) |
constexpr int64_t | get_register_mem_alloc_limit () |
int | get_unroll_limit (const Target &target) |
bool | in_range_zero_one (double x) |
bool | are_valid_thread_extents (const vector< int64_t > &counts) |
double | get_idle_lane_wastage_limit_env_var () |
double | get_idle_lane_wastage_limit () |
bool | all (const vector< int > &v) |
bool | accessed_at_constant_indices (const std::vector< int > &unrolled, const FunctionDAG::Edge *e) |
bool | verify_memoized_features () |
bool | is_memoize_blocks_enabled () |
double | get_stack_memory_adjustment_factor () |
int64_t | get_stack_memory_limit () |
bool | use_adjusted_tilings () |
bool | compute_root_and_inline_only () |
template<typename PostCreateMutator > | |
void | deep_copy_loop_nest (LoopNest *new_loop_nest, const LoopNest *new_loop_nest_parent, const IntrusivePtr< const LoopNest > &existing_loop_nest, const PostCreateMutator &post_create_mutator) |
template<typename PostCreateMutator > | |
LoopNest * | deep_copy_loop_nest (const IntrusivePtr< const LoopNest > &loop_nest, const PostCreateMutator &post_create_mutator) |
template<typename A , typename B > | |
void | expect_eq (int line, const A &expected, const B &actual) |
template<typename A , typename B > | |
void | approx_eq (int line, const A &expected, const B &actual, float epsilon) |
template<typename A > | |
void | expect (int line, const A &expected) |
bool | all_ones (const std::vector< int64_t > &nums) |
bool | equal_to_existing_size (const std::vector< int64_t > &s, const std::vector< int64_t > &nums) |
std::vector< std::vector< int64_t > > | generate_serial_tilings (const std::vector< int64_t > &s, int d, int last_d, int vectorized_index, const std::vector< int > &vec_dim_serial_sizes, bool filter_small_outer_extents=false, bool allow_inner_ones=false) |
std::vector< std::vector< int64_t > > | generate_tilings (const std::vector< int64_t > &s, int d, int factor, bool allow_splits, const std::vector< int > &inner_sizes=std::vector< int >()) |
void | lowered_dims (const std::vector< int64_t > &size, int vector_loop_i, std::vector< int64_t > &lowered_size) |
moves vectorized dimension first and also removes dimensions with size 1 to reflect actual thread dimensions when loop nests are lowered | |
std::vector< std::vector< int64_t > > | generate_gpu_tilings (const std::vector< std::vector< int64_t > > &stage_sizes, const std::vector< std::vector< int > > &pure_dims, const std::vector< int64_t > &max_s, int d, const std::vector< int > &vectorized_indices, bool serial_inner, bool is_compute_root_stage) |
Variables | |
constexpr int | kLocalMemoryLimit = 524288 |
typedef PerfectHashMap< FunctionDAG::Node::Stage, ScheduleFeatures > Halide::Internal::Autoscheduler::StageMapOfScheduleFeatures |
Definition at line 12 of file AutoSchedule.h.
using Halide::Internal::Autoscheduler::BlockCache = NodeMap<std::map<int, std::vector<IntrusivePtr<const LoopNest>>>> |
typedef IntrusivePtr< const BoundContents > Halide::Internal::Autoscheduler::Bound = IntrusivePtr<const BoundContents> |
Definition at line 363 of file FunctionDAG.h.
using Halide::Internal::Autoscheduler::NodeMap = PerfectHashMap<FunctionDAG::Node, T> |
Definition at line 21 of file LoopNest.h.
using Halide::Internal::Autoscheduler::StageMap = PerfectHashMap<FunctionDAG::Node::Stage, T> |
Definition at line 24 of file LoopNest.h.
typedef std::chrono::high_resolution_clock Halide::Internal::Autoscheduler::Clock = std::chrono::high_resolution_clock |
using Halide::Internal::Autoscheduler::Accumulator = typename MemTraits<T>::Accumulator |
Definition at line 53 of file GPUMemInfo.h.
using Halide::Internal::Autoscheduler::MemInfoType = MemInfo<typename MemTraits<T>::MemInfoType> |
Definition at line 109 of file GPUMemInfo.h.
using Halide::Internal::Autoscheduler::GlobalMemInfo = MemInfoType<GlobalMem> |
Definition at line 111 of file GPUMemInfo.h.
using Halide::Internal::Autoscheduler::SharedMemInfo = MemInfoType<SharedMem> |
Definition at line 112 of file GPUMemInfo.h.
using Halide::Internal::Autoscheduler::LocalMemInfo = MemInfoType<LocalMem> |
Definition at line 113 of file GPUMemInfo.h.
using Halide::Internal::Autoscheduler::LoopNestMap = map<const LoopNest *, pair<const LoopNest *, int>> |
|
strong |
Enumerator | |
---|---|
Block | |
Thread | |
Serial | |
Simd | |
Parallelized | |
None |
Definition at line 32 of file LoopNest.h.
|
strong |
Enumerator | |
---|---|
Global | |
Shared | |
Local | |
Registers | |
Inlined |
Definition at line 44 of file LoopNest.h.
void Halide::Internal::Autoscheduler::find_and_apply_schedule | ( | FunctionDAG & | dag, |
const std::vector< Function > & | outputs, | ||
const Adams2019Params & | params, | ||
CostModel * | cost_model, | ||
int | beam_size, | ||
StageMapOfScheduleFeatures * | schedule_features ) |
std::vector< std::vector< int64_t > > Halide::Internal::Autoscheduler::generate_tilings | ( | const vector< int64_t > & | s, |
int | d, | ||
int | factor, | ||
bool | allow_splits ) |
const LoopNest * Halide::Internal::Autoscheduler::deepest_common_ancestor | ( | const std::map< const LoopNest *, std::pair< const LoopNest *, int > > & | parents, |
const LoopNest * | a, | ||
const LoopNest * | b ) |
void Halide::Internal::Autoscheduler::compute_loop_nest_parents | ( | std::map< const LoopNest *, std::pair< const LoopNest *, int > > & | parents, |
const LoopNest * | here, | ||
int | depth ) |
void Halide::Internal::Autoscheduler::find_and_apply_schedule | ( | FunctionDAG & | dag, |
const std::vector< Function > & | outputs, | ||
const Anderson2021Params & | params, | ||
const Target & | target, | ||
CostModel * | cost_model, | ||
int | beam_size, | ||
StageMapOfScheduleFeatures * | schedule_features ) |
void Halide::Internal::Autoscheduler::sanitize_names | ( | std::string & | str | ) |
std::string Halide::Internal::Autoscheduler::stringify | ( | GPU_parallelism | label | ) |
bool Halide::Internal::Autoscheduler::may_subtile | ( | const Anderson2021Params & | params | ) |
int64_t Halide::Internal::Autoscheduler::get_shared_memory_limit | ( | const Anderson2021Params & | params | ) |
int64_t Halide::Internal::Autoscheduler::get_shared_memory_sm_limit | ( | const Anderson2021Params & | params | ) |
int64_t Halide::Internal::Autoscheduler::get_active_block_hardware_limit | ( | const Anderson2021Params & | params | ) |
int64_t Halide::Internal::Autoscheduler::get_active_warp_hardware_limit | ( | const Anderson2021Params & | params | ) |
|
constexpr |
Definition at line 62 of file LoopNest.h.
int Halide::Internal::Autoscheduler::get_unroll_limit | ( | const Target & | target | ) |
bool Halide::Internal::Autoscheduler::in_range_zero_one | ( | double | x | ) |
bool Halide::Internal::Autoscheduler::are_valid_thread_extents | ( | const vector< int64_t > & | counts | ) |
double Halide::Internal::Autoscheduler::get_idle_lane_wastage_limit_env_var | ( | ) |
double Halide::Internal::Autoscheduler::get_idle_lane_wastage_limit | ( | ) |
bool Halide::Internal::Autoscheduler::all | ( | const vector< int > & | v | ) |
bool Halide::Internal::Autoscheduler::accessed_at_constant_indices | ( | const std::vector< int > & | unrolled, |
const FunctionDAG::Edge * | e ) |
bool Halide::Internal::Autoscheduler::verify_memoized_features | ( | ) |
bool Halide::Internal::Autoscheduler::is_memoize_blocks_enabled | ( | ) |
double Halide::Internal::Autoscheduler::get_stack_memory_adjustment_factor | ( | ) |
int64_t Halide::Internal::Autoscheduler::get_stack_memory_limit | ( | ) |
bool Halide::Internal::Autoscheduler::use_adjusted_tilings | ( | ) |
bool Halide::Internal::Autoscheduler::compute_root_and_inline_only | ( | ) |
void Halide::Internal::Autoscheduler::deep_copy_loop_nest | ( | LoopNest * | new_loop_nest, |
const LoopNest * | new_loop_nest_parent, | ||
const IntrusivePtr< const LoopNest > & | existing_loop_nest, | ||
const PostCreateMutator & | post_create_mutator ) |
Definition at line 50 of file State.h.
References Halide::Internal::Autoscheduler::LoopNest::children, Halide::Internal::Autoscheduler::LoopNest::copy_from(), and deep_copy_loop_nest().
Referenced by Halide::Internal::Autoscheduler::State::create_feature_root(), deep_copy_loop_nest(), and deep_copy_loop_nest().
LoopNest * Halide::Internal::Autoscheduler::deep_copy_loop_nest | ( | const IntrusivePtr< const LoopNest > & | loop_nest, |
const PostCreateMutator & | post_create_mutator ) |
Definition at line 68 of file State.h.
References deep_copy_loop_nest().
void Halide::Internal::Autoscheduler::expect_eq | ( | int | line, |
const A & | expected, | ||
const B & | actual ) |
Definition at line 16 of file test.h.
References user_assert.
void Halide::Internal::Autoscheduler::approx_eq | ( | int | line, |
const A & | expected, | ||
const B & | actual, | ||
float | epsilon ) |
Definition at line 24 of file test.h.
References user_assert.
void Halide::Internal::Autoscheduler::expect | ( | int | line, |
const A & | expected ) |
Definition at line 32 of file test.h.
References user_assert.
bool Halide::Internal::Autoscheduler::all_ones | ( | const std::vector< int64_t > & | nums | ) |
bool Halide::Internal::Autoscheduler::equal_to_existing_size | ( | const std::vector< int64_t > & | s, |
const std::vector< int64_t > & | nums ) |
std::vector< std::vector< int64_t > > Halide::Internal::Autoscheduler::generate_serial_tilings | ( | const std::vector< int64_t > & | s, |
int | d, | ||
int | last_d, | ||
int | vectorized_index, | ||
const std::vector< int > & | vec_dim_serial_sizes, | ||
bool | filter_small_outer_extents = false, | ||
bool | allow_inner_ones = false ) |
std::vector< std::vector< int64_t > > Halide::Internal::Autoscheduler::generate_tilings | ( | const std::vector< int64_t > & | s, |
int | d, | ||
int | factor, | ||
bool | allow_splits, | ||
const std::vector< int > & | inner_sizes = std::vector< int >() ) |
void Halide::Internal::Autoscheduler::lowered_dims | ( | const std::vector< int64_t > & | size, |
int | vector_loop_i, | ||
std::vector< int64_t > & | lowered_size ) |
moves vectorized dimension first and also removes dimensions with size 1 to reflect actual thread dimensions when loop nests are lowered
std::vector< std::vector< int64_t > > Halide::Internal::Autoscheduler::generate_gpu_tilings | ( | const std::vector< std::vector< int64_t > > & | stage_sizes, |
const std::vector< std::vector< int > > & | pure_dims, | ||
const std::vector< int64_t > & | max_s, | ||
int | d, | ||
const std::vector< int > & | vectorized_indices, | ||
bool | serial_inner, | ||
bool | is_compute_root_stage ) |