|
Halide 21.0.0
Halide compiler and libraries
|
Classes | |
| struct | Adams2019Params |
| struct | Anderson2021Params |
| struct | BoundContents |
| struct | Cache |
| struct | CachingOptions |
| class | ExprBranching |
| struct | Filter |
| struct | FunctionDAG |
| struct | GlobalAccessAccumulator |
| struct | GPULoopInfo |
| class | LoadJacobian |
| struct | LocalAccessAccumulator |
| struct | LoopNest |
| class | LoopNestParser |
| class | map |
| STL class. More... | |
| struct | MemInfo |
| struct | MemTraits |
| struct | MemTraits< GlobalMem > |
| struct | MemTraits< LocalMem > |
| struct | MemTraits< SharedMem > |
| struct | NoOpMutator |
| struct | OptionalRational |
| class | pair |
| STL class. More... | |
| class | ParamParser |
| struct | ScopedStatistic |
| struct | ScopedTimer |
| struct | SearchSpace |
| struct | SearchSpaceOptions |
| class | set |
| STL class. More... | |
| struct | SharedAccessAccumulator |
| class | Span |
| struct | State |
| class | StateQueue |
| struct | Statistics |
| struct | Strides |
| class | string |
| STL class. More... | |
| struct | ThreadInfo |
| struct | ThreadTileOption |
| struct | Timer |
| class | unique_ptr |
| STL class. More... | |
| class | unordered_set |
| STL class. More... | |
| class | vector |
| STL class. More... | |
Typedefs | |
| typedef PerfectHashMap< FunctionDAG::Node::Stage, ScheduleFeatures > | StageMapOfScheduleFeatures |
| using | BlockCache = NodeMap<std::map<int, std::vector<IntrusivePtr<const LoopNest>>>> |
| using | Bound = IntrusivePtr<const BoundContents> |
| template<typename T> | |
| using | NodeMap = PerfectHashMap<FunctionDAG::Node, T> |
| template<typename T> | |
| using | StageMap = PerfectHashMap<FunctionDAG::Node::Stage, T> |
| using | Clock = std::chrono::high_resolution_clock |
| template<typename T> | |
| using | Accumulator = typename MemTraits<T>::Accumulator |
| template<typename T> | |
| using | MemInfoType = MemInfo<typename MemTraits<T>::MemInfoType> |
| using | GlobalMemInfo = MemInfoType<GlobalMem> |
| using | SharedMemInfo = MemInfoType<SharedMem> |
| using | LocalMemInfo = MemInfoType<LocalMem> |
| using | LoopNestMap = map<const LoopNest *, pair<const LoopNest *, int>> |
Enumerations | |
| enum class | GPU_parallelism { Block , Thread , Serial , Simd , Parallelized , None } |
| enum class | GPUMemoryType { Global , Shared , Local , Registers , Inlined } |
Functions | |
| void | find_and_apply_schedule (FunctionDAG &dag, const std::vector< Function > &outputs, const Adams2019Params ¶ms, CostModel *cost_model, int beam_size, StageMapOfScheduleFeatures *schedule_features) |
| std::vector< std::vector< int64_t > > | generate_tilings (const vector< int64_t > &s, int d, int factor, bool allow_splits) |
| const LoopNest * | deepest_common_ancestor (const std::map< const LoopNest *, std::pair< const LoopNest *, int > > &parents, const LoopNest *a, const LoopNest *b) |
| void | compute_loop_nest_parents (std::map< const LoopNest *, std::pair< const LoopNest *, int > > &parents, const LoopNest *here, int depth) |
| void | find_and_apply_schedule (FunctionDAG &dag, const std::vector< Function > &outputs, const Anderson2021Params ¶ms, const Target &target, CostModel *cost_model, int beam_size, StageMapOfScheduleFeatures *schedule_features) |
| void | sanitize_names (std::string &str) |
| std::string | stringify (GPU_parallelism label) |
| bool | may_subtile (const Anderson2021Params ¶ms) |
| int64_t | get_shared_memory_limit (const Anderson2021Params ¶ms) |
| int64_t | get_shared_memory_sm_limit (const Anderson2021Params ¶ms) |
| int64_t | get_active_block_hardware_limit (const Anderson2021Params ¶ms) |
| int64_t | get_active_warp_hardware_limit (const Anderson2021Params ¶ms) |
| constexpr int64_t | get_register_mem_alloc_limit () |
| int | get_unroll_limit (const Target &target) |
| bool | in_range_zero_one (double x) |
| bool | are_valid_thread_extents (const vector< int64_t > &counts) |
| double | get_idle_lane_wastage_limit_env_var () |
| double | get_idle_lane_wastage_limit () |
| bool | all (const vector< int > &v) |
| bool | accessed_at_constant_indices (const std::vector< int > &unrolled, const FunctionDAG::Edge *e) |
| bool | verify_memoized_features () |
| bool | is_memoize_blocks_enabled () |
| double | get_stack_memory_adjustment_factor () |
| int64_t | get_stack_memory_limit () |
| bool | use_adjusted_tilings () |
| bool | compute_root_and_inline_only () |
| template<typename PostCreateMutator> | |
| void | deep_copy_loop_nest (LoopNest *new_loop_nest, const LoopNest *new_loop_nest_parent, const IntrusivePtr< const LoopNest > &existing_loop_nest, const PostCreateMutator &post_create_mutator) |
| template<typename PostCreateMutator> | |
| LoopNest * | deep_copy_loop_nest (const IntrusivePtr< const LoopNest > &loop_nest, const PostCreateMutator &post_create_mutator) |
| template<typename A, typename B> | |
| void | expect_eq (int line, const A &expected, const B &actual) |
| template<typename A, typename B> | |
| void | approx_eq (int line, const A &expected, const B &actual, float epsilon) |
| template<typename A> | |
| void | expect (int line, const A &expected) |
| bool | all_ones (const std::vector< int64_t > &nums) |
| bool | equal_to_existing_size (const std::vector< int64_t > &s, const std::vector< int64_t > &nums) |
| std::vector< std::vector< int64_t > > | generate_serial_tilings (const std::vector< int64_t > &s, int d, int last_d, int vectorized_index, const std::vector< int > &vec_dim_serial_sizes, bool filter_small_outer_extents=false, bool allow_inner_ones=false) |
| std::vector< std::vector< int64_t > > | generate_tilings (const std::vector< int64_t > &s, int d, int factor, bool allow_splits, const std::vector< int > &inner_sizes=std::vector< int >()) |
| void | lowered_dims (const std::vector< int64_t > &size, int vector_loop_i, std::vector< int64_t > &lowered_size) |
| moves vectorized dimension first and also removes dimensions with size 1 to reflect actual thread dimensions when loop nests are lowered | |
| std::vector< std::vector< int64_t > > | generate_gpu_tilings (const std::vector< std::vector< int64_t > > &stage_sizes, const std::vector< std::vector< int > > &pure_dims, const std::vector< int64_t > &max_s, int d, const std::vector< int > &vectorized_indices, bool serial_inner, bool is_compute_root_stage) |
Variables | |
| constexpr int | kLocalMemoryLimit = 524288 |
| typedef PerfectHashMap< FunctionDAG::Node::Stage, ScheduleFeatures > Halide::Internal::Autoscheduler::StageMapOfScheduleFeatures |
Definition at line 12 of file AutoSchedule.h.
| using Halide::Internal::Autoscheduler::BlockCache = NodeMap<std::map<int, std::vector<IntrusivePtr<const LoopNest>>>> |
| typedef IntrusivePtr< const BoundContents > Halide::Internal::Autoscheduler::Bound = IntrusivePtr<const BoundContents> |
Definition at line 362 of file FunctionDAG.h.
| using Halide::Internal::Autoscheduler::NodeMap = PerfectHashMap<FunctionDAG::Node, T> |
Definition at line 21 of file LoopNest.h.
| using Halide::Internal::Autoscheduler::StageMap = PerfectHashMap<FunctionDAG::Node::Stage, T> |
Definition at line 24 of file LoopNest.h.
| using Halide::Internal::Autoscheduler::Clock = std::chrono::high_resolution_clock |
| using Halide::Internal::Autoscheduler::Accumulator = typename MemTraits<T>::Accumulator |
Definition at line 52 of file GPUMemInfo.h.
| using Halide::Internal::Autoscheduler::MemInfoType = MemInfo<typename MemTraits<T>::MemInfoType> |
Definition at line 108 of file GPUMemInfo.h.
| using Halide::Internal::Autoscheduler::GlobalMemInfo = MemInfoType<GlobalMem> |
Definition at line 110 of file GPUMemInfo.h.
| using Halide::Internal::Autoscheduler::SharedMemInfo = MemInfoType<SharedMem> |
Definition at line 111 of file GPUMemInfo.h.
| using Halide::Internal::Autoscheduler::LocalMemInfo = MemInfoType<LocalMem> |
Definition at line 112 of file GPUMemInfo.h.
| using Halide::Internal::Autoscheduler::LoopNestMap = map<const LoopNest *, pair<const LoopNest *, int>> |
|
strong |
| Enumerator | |
|---|---|
| Block | |
| Thread | |
| Serial | |
| Simd | |
| Parallelized | |
| None | |
Definition at line 32 of file LoopNest.h.
|
strong |
| Enumerator | |
|---|---|
| Global | |
| Shared | |
| Local | |
| Registers | |
| Inlined | |
Definition at line 44 of file LoopNest.h.
| void Halide::Internal::Autoscheduler::find_and_apply_schedule | ( | FunctionDAG & | dag, |
| const std::vector< Function > & | outputs, | ||
| const Adams2019Params & | params, | ||
| CostModel * | cost_model, | ||
| int | beam_size, | ||
| StageMapOfScheduleFeatures * | schedule_features ) |
| std::vector< std::vector< int64_t > > Halide::Internal::Autoscheduler::generate_tilings | ( | const vector< int64_t > & | s, |
| int | d, | ||
| int | factor, | ||
| bool | allow_splits ) |
| const LoopNest * Halide::Internal::Autoscheduler::deepest_common_ancestor | ( | const std::map< const LoopNest *, std::pair< const LoopNest *, int > > & | parents, |
| const LoopNest * | a, | ||
| const LoopNest * | b ) |
| void Halide::Internal::Autoscheduler::compute_loop_nest_parents | ( | std::map< const LoopNest *, std::pair< const LoopNest *, int > > & | parents, |
| const LoopNest * | here, | ||
| int | depth ) |
| void Halide::Internal::Autoscheduler::find_and_apply_schedule | ( | FunctionDAG & | dag, |
| const std::vector< Function > & | outputs, | ||
| const Anderson2021Params & | params, | ||
| const Target & | target, | ||
| CostModel * | cost_model, | ||
| int | beam_size, | ||
| StageMapOfScheduleFeatures * | schedule_features ) |
| void Halide::Internal::Autoscheduler::sanitize_names | ( | std::string & | str | ) |
| std::string Halide::Internal::Autoscheduler::stringify | ( | GPU_parallelism | label | ) |
| bool Halide::Internal::Autoscheduler::may_subtile | ( | const Anderson2021Params & | params | ) |
| int64_t Halide::Internal::Autoscheduler::get_shared_memory_limit | ( | const Anderson2021Params & | params | ) |
| int64_t Halide::Internal::Autoscheduler::get_shared_memory_sm_limit | ( | const Anderson2021Params & | params | ) |
| int64_t Halide::Internal::Autoscheduler::get_active_block_hardware_limit | ( | const Anderson2021Params & | params | ) |
| int64_t Halide::Internal::Autoscheduler::get_active_warp_hardware_limit | ( | const Anderson2021Params & | params | ) |
|
constexpr |
Definition at line 62 of file LoopNest.h.
| int Halide::Internal::Autoscheduler::get_unroll_limit | ( | const Target & | target | ) |
| bool Halide::Internal::Autoscheduler::in_range_zero_one | ( | double | x | ) |
| bool Halide::Internal::Autoscheduler::are_valid_thread_extents | ( | const vector< int64_t > & | counts | ) |
| double Halide::Internal::Autoscheduler::get_idle_lane_wastage_limit_env_var | ( | ) |
| double Halide::Internal::Autoscheduler::get_idle_lane_wastage_limit | ( | ) |
| bool Halide::Internal::Autoscheduler::all | ( | const vector< int > & | v | ) |
| bool Halide::Internal::Autoscheduler::accessed_at_constant_indices | ( | const std::vector< int > & | unrolled, |
| const FunctionDAG::Edge * | e ) |
References Halide::Internal::ref_count().
| bool Halide::Internal::Autoscheduler::verify_memoized_features | ( | ) |
| bool Halide::Internal::Autoscheduler::is_memoize_blocks_enabled | ( | ) |
| double Halide::Internal::Autoscheduler::get_stack_memory_adjustment_factor | ( | ) |
| int64_t Halide::Internal::Autoscheduler::get_stack_memory_limit | ( | ) |
| bool Halide::Internal::Autoscheduler::use_adjusted_tilings | ( | ) |
| bool Halide::Internal::Autoscheduler::compute_root_and_inline_only | ( | ) |
| void Halide::Internal::Autoscheduler::deep_copy_loop_nest | ( | LoopNest * | new_loop_nest, |
| const LoopNest * | new_loop_nest_parent, | ||
| const IntrusivePtr< const LoopNest > & | existing_loop_nest, | ||
| const PostCreateMutator & | post_create_mutator ) |
Definition at line 50 of file State.h.
References Halide::Internal::Autoscheduler::LoopNest::children, Halide::Internal::Autoscheduler::LoopNest::copy_from(), and deep_copy_loop_nest().
Referenced by Halide::Internal::Autoscheduler::State::create_feature_root(), deep_copy_loop_nest(), and deep_copy_loop_nest().
| LoopNest * Halide::Internal::Autoscheduler::deep_copy_loop_nest | ( | const IntrusivePtr< const LoopNest > & | loop_nest, |
| const PostCreateMutator & | post_create_mutator ) |
Definition at line 68 of file State.h.
References deep_copy_loop_nest().
| void Halide::Internal::Autoscheduler::expect_eq | ( | int | line, |
| const A & | expected, | ||
| const B & | actual ) |
Definition at line 16 of file test.h.
References user_assert.
| void Halide::Internal::Autoscheduler::approx_eq | ( | int | line, |
| const A & | expected, | ||
| const B & | actual, | ||
| float | epsilon ) |
Definition at line 24 of file test.h.
References user_assert.
| void Halide::Internal::Autoscheduler::expect | ( | int | line, |
| const A & | expected ) |
Definition at line 32 of file test.h.
References user_assert.
| bool Halide::Internal::Autoscheduler::all_ones | ( | const std::vector< int64_t > & | nums | ) |
| bool Halide::Internal::Autoscheduler::equal_to_existing_size | ( | const std::vector< int64_t > & | s, |
| const std::vector< int64_t > & | nums ) |
| std::vector< std::vector< int64_t > > Halide::Internal::Autoscheduler::generate_serial_tilings | ( | const std::vector< int64_t > & | s, |
| int | d, | ||
| int | last_d, | ||
| int | vectorized_index, | ||
| const std::vector< int > & | vec_dim_serial_sizes, | ||
| bool | filter_small_outer_extents = false, | ||
| bool | allow_inner_ones = false ) |
| std::vector< std::vector< int64_t > > Halide::Internal::Autoscheduler::generate_tilings | ( | const std::vector< int64_t > & | s, |
| int | d, | ||
| int | factor, | ||
| bool | allow_splits, | ||
| const std::vector< int > & | inner_sizes = std::vector< int >() ) |
| void Halide::Internal::Autoscheduler::lowered_dims | ( | const std::vector< int64_t > & | size, |
| int | vector_loop_i, | ||
| std::vector< int64_t > & | lowered_size ) |
moves vectorized dimension first and also removes dimensions with size 1 to reflect actual thread dimensions when loop nests are lowered
| std::vector< std::vector< int64_t > > Halide::Internal::Autoscheduler::generate_gpu_tilings | ( | const std::vector< std::vector< int64_t > > & | stage_sizes, |
| const std::vector< std::vector< int > > & | pure_dims, | ||
| const std::vector< int64_t > & | max_s, | ||
| int | d, | ||
| const std::vector< int > & | vectorized_indices, | ||
| bool | serial_inner, | ||
| bool | is_compute_root_stage ) |