Halide 19.0.0
Halide compiler and libraries
|
A single definition of a Func. More...
#include <Func.h>
Public Member Functions | |
Stage (Internal::Function f, Internal::Definition d, size_t stage_index) | |
const Internal::StageSchedule & | get_schedule () const |
Return the current StageSchedule associated with this Stage. | |
std::string | dump_argument_list () const |
Return a string describing the current var list taking into account all the splits, reorders, and tiles. | |
std::string | name () const |
Return the name of this stage, e.g. | |
Func | rfactor (std::vector< std::pair< RVar, Var > > preserved) |
Calling rfactor() on an associative update definition a Func will split the update into an intermediate which computes the partial results and replaces the current update definition with a new definition which merges the partial results. | |
Func | rfactor (const RVar &r, const Var &v) |
Stage & | compute_with (LoopLevel loop_level, const std::vector< std::pair< VarOrRVar, LoopAlignStrategy > > &align) |
Schedule the iteration over this stage to be fused with another stage 's' from outermost loop to a given LoopLevel. | |
Stage & | compute_with (LoopLevel loop_level, LoopAlignStrategy align=LoopAlignStrategy::Auto) |
Stage & | compute_with (const Stage &s, const VarOrRVar &var, const std::vector< std::pair< VarOrRVar, LoopAlignStrategy > > &align) |
Stage & | compute_with (const Stage &s, const VarOrRVar &var, LoopAlignStrategy align=LoopAlignStrategy::Auto) |
Stage & | split (const VarOrRVar &old, const VarOrRVar &outer, const VarOrRVar &inner, const Expr &factor, TailStrategy tail=TailStrategy::Auto) |
Scheduling calls that control how the domain of this stage is traversed. | |
Stage & | fuse (const VarOrRVar &inner, const VarOrRVar &outer, const VarOrRVar &fused) |
Stage & | serial (const VarOrRVar &var) |
Stage & | parallel (const VarOrRVar &var) |
Stage & | vectorize (const VarOrRVar &var) |
Stage & | unroll (const VarOrRVar &var) |
Stage & | parallel (const VarOrRVar &var, const Expr &task_size, TailStrategy tail=TailStrategy::Auto) |
Stage & | vectorize (const VarOrRVar &var, const Expr &factor, TailStrategy tail=TailStrategy::Auto) |
Stage & | unroll (const VarOrRVar &var, const Expr &factor, TailStrategy tail=TailStrategy::Auto) |
Stage & | partition (const VarOrRVar &var, Partition partition_policy) |
Stage & | never_partition_all () |
Stage & | never_partition (const std::vector< VarOrRVar > &vars) |
Stage & | always_partition_all () |
Stage & | always_partition (const std::vector< VarOrRVar > &vars) |
Stage & | tile (const VarOrRVar &x, const VarOrRVar &y, const VarOrRVar &xo, const VarOrRVar &yo, const VarOrRVar &xi, const VarOrRVar &yi, const Expr &xfactor, const Expr &yfactor, TailStrategy tail=TailStrategy::Auto) |
Stage & | tile (const VarOrRVar &x, const VarOrRVar &y, const VarOrRVar &xi, const VarOrRVar &yi, const Expr &xfactor, const Expr &yfactor, TailStrategy tail=TailStrategy::Auto) |
Stage & | tile (const std::vector< VarOrRVar > &previous, const std::vector< VarOrRVar > &outers, const std::vector< VarOrRVar > &inners, const std::vector< Expr > &factors, const std::vector< TailStrategy > &tails) |
Stage & | tile (const std::vector< VarOrRVar > &previous, const std::vector< VarOrRVar > &outers, const std::vector< VarOrRVar > &inners, const std::vector< Expr > &factors, TailStrategy tail=TailStrategy::Auto) |
Stage & | tile (const std::vector< VarOrRVar > &previous, const std::vector< VarOrRVar > &inners, const std::vector< Expr > &factors, TailStrategy tail=TailStrategy::Auto) |
Stage & | reorder (const std::vector< VarOrRVar > &vars) |
template<typename... Args> | |
HALIDE_NO_USER_CODE_INLINE std::enable_if< Internal::all_are_convertible< VarOrRVar, Args... >::value, Stage & >::type | reorder (const VarOrRVar &x, const VarOrRVar &y, Args &&...args) |
template<typename... Args> | |
HALIDE_NO_USER_CODE_INLINE std::enable_if< Internal::all_are_convertible< VarOrRVar, Args... >::value, Stage & >::type | never_partition (const VarOrRVar &x, Args &&...args) |
template<typename... Args> | |
HALIDE_NO_USER_CODE_INLINE std::enable_if< Internal::all_are_convertible< VarOrRVar, Args... >::value, Stage & >::type | always_partition (const VarOrRVar &x, Args &&...args) |
Stage & | rename (const VarOrRVar &old_name, const VarOrRVar &new_name) |
Stage | specialize (const Expr &condition) |
void | specialize_fail (const std::string &message) |
Stage & | gpu_threads (const VarOrRVar &thread_x, DeviceAPI device_api=DeviceAPI::Default_GPU) |
Stage & | gpu_threads (const VarOrRVar &thread_x, const VarOrRVar &thread_y, DeviceAPI device_api=DeviceAPI::Default_GPU) |
Stage & | gpu_threads (const VarOrRVar &thread_x, const VarOrRVar &thread_y, const VarOrRVar &thread_z, DeviceAPI device_api=DeviceAPI::Default_GPU) |
Stage & | gpu_lanes (const VarOrRVar &thread_x, DeviceAPI device_api=DeviceAPI::Default_GPU) |
Stage & | gpu_single_thread (DeviceAPI device_api=DeviceAPI::Default_GPU) |
Stage & | gpu_blocks (const VarOrRVar &block_x, DeviceAPI device_api=DeviceAPI::Default_GPU) |
Stage & | gpu_blocks (const VarOrRVar &block_x, const VarOrRVar &block_y, DeviceAPI device_api=DeviceAPI::Default_GPU) |
Stage & | gpu_blocks (const VarOrRVar &block_x, const VarOrRVar &block_y, const VarOrRVar &block_z, DeviceAPI device_api=DeviceAPI::Default_GPU) |
Stage & | gpu (const VarOrRVar &block_x, const VarOrRVar &thread_x, DeviceAPI device_api=DeviceAPI::Default_GPU) |
Stage & | gpu (const VarOrRVar &block_x, const VarOrRVar &block_y, const VarOrRVar &thread_x, const VarOrRVar &thread_y, DeviceAPI device_api=DeviceAPI::Default_GPU) |
Stage & | gpu (const VarOrRVar &block_x, const VarOrRVar &block_y, const VarOrRVar &block_z, const VarOrRVar &thread_x, const VarOrRVar &thread_y, const VarOrRVar &thread_z, DeviceAPI device_api=DeviceAPI::Default_GPU) |
Stage & | gpu_tile (const VarOrRVar &x, const VarOrRVar &bx, const VarOrRVar &tx, const Expr &x_size, TailStrategy tail=TailStrategy::Auto, DeviceAPI device_api=DeviceAPI::Default_GPU) |
Stage & | gpu_tile (const VarOrRVar &x, const VarOrRVar &tx, const Expr &x_size, TailStrategy tail=TailStrategy::Auto, DeviceAPI device_api=DeviceAPI::Default_GPU) |
Stage & | gpu_tile (const VarOrRVar &x, const VarOrRVar &y, const VarOrRVar &bx, const VarOrRVar &by, const VarOrRVar &tx, const VarOrRVar &ty, const Expr &x_size, const Expr &y_size, TailStrategy tail=TailStrategy::Auto, DeviceAPI device_api=DeviceAPI::Default_GPU) |
Stage & | gpu_tile (const VarOrRVar &x, const VarOrRVar &y, const VarOrRVar &tx, const VarOrRVar &ty, const Expr &x_size, const Expr &y_size, TailStrategy tail=TailStrategy::Auto, DeviceAPI device_api=DeviceAPI::Default_GPU) |
Stage & | gpu_tile (const VarOrRVar &x, const VarOrRVar &y, const VarOrRVar &z, const VarOrRVar &bx, const VarOrRVar &by, const VarOrRVar &bz, const VarOrRVar &tx, const VarOrRVar &ty, const VarOrRVar &tz, const Expr &x_size, const Expr &y_size, const Expr &z_size, TailStrategy tail=TailStrategy::Auto, DeviceAPI device_api=DeviceAPI::Default_GPU) |
Stage & | gpu_tile (const VarOrRVar &x, const VarOrRVar &y, const VarOrRVar &z, const VarOrRVar &tx, const VarOrRVar &ty, const VarOrRVar &tz, const Expr &x_size, const Expr &y_size, const Expr &z_size, TailStrategy tail=TailStrategy::Auto, DeviceAPI device_api=DeviceAPI::Default_GPU) |
Stage & | allow_race_conditions () |
Stage & | atomic (bool override_associativity_test=false) |
Stage & | hexagon (const VarOrRVar &x=Var::outermost()) |
Stage & | prefetch (const Func &f, const VarOrRVar &at, const VarOrRVar &from, Expr offset=1, PrefetchBoundStrategy strategy=PrefetchBoundStrategy::GuardWithIf) |
Stage & | prefetch (const Parameter ¶m, const VarOrRVar &at, const VarOrRVar &from, Expr offset=1, PrefetchBoundStrategy strategy=PrefetchBoundStrategy::GuardWithIf) |
template<typename T > | |
Stage & | prefetch (const T &image, const VarOrRVar &at, const VarOrRVar &from, Expr offset=1, PrefetchBoundStrategy strategy=PrefetchBoundStrategy::GuardWithIf) |
void | unscheduled () |
Assert that this stage has intentionally been given no schedule, and suppress the warning about unscheduled update definitions that would otherwise fire. | |
A single definition of a Func.
May be a pure or update definition.
|
inline |
Definition at line 93 of file Func.h.
References Halide::Internal::Definition::args(), Halide::Internal::Function::args(), Halide::Internal::Definition::defined(), and internal_assert.
|
inline |
Return the current StageSchedule associated with this Stage.
For introspection only: to modify schedule, use the Func interface.
Definition at line 106 of file Func.h.
References Halide::Internal::Definition::schedule().
Referenced by do_cost_model_schedule(), and Halide::Func::get_schedule().
std::string Halide::Stage::dump_argument_list | ( | ) | const |
Return a string describing the current var list taking into account all the splits, reorders, and tiles.
std::string Halide::Stage::name | ( | ) | const |
Return the name of this stage, e.g.
"f.update(2)"
Calling rfactor() on an associative update definition a Func will split the update into an intermediate which computes the partial results and replaces the current update definition with a new definition which merges the partial results.
If called on a init/pure definition, this will throw an error. rfactor() will automatically infer the associative reduction operator and identity of the operator. If it can't prove the operation is associative or if it cannot find an identity for that operator, this will throw an error. In addition, commutativity of the operator is required if rfactor() is called on the inner dimension but excluding the outer dimensions.
rfactor() takes as input 'preserved', which is a list of <RVar, Var> pairs. The rvars not listed in 'preserved' are removed from the original Func and are lifted to the intermediate Func. The remaining rvars (the ones in 'preserved') are made pure in the intermediate Func. The intermediate Func's update definition inherits all scheduling directives (e.g. split,fuse, etc.) applied to the original Func's update definition. The loop order of the intermediate Func's update definition is the same as the original, although the RVars in 'preserved' are replaced by the new pure Vars. The loop order of the intermediate Func's init definition from innermost to outermost is the args' order of the original Func's init definition followed by the new pure Vars.
The intermediate Func also inherits storage order from the original Func with the new pure Vars added to the outermost.
For example, f.update(0).rfactor({{r.y, u}}) would rewrite a pipeline like this:
into a pipeline like this:
This has a variety of uses. You can use it to split computation of an associative reduction:
, which is equivalent to:
Stage & Halide::Stage::compute_with | ( | LoopLevel | loop_level, |
const std::vector< std::pair< VarOrRVar, LoopAlignStrategy > > & | align ) |
Schedule the iteration over this stage to be fused with another stage 's' from outermost loop to a given LoopLevel.
'this' stage will be computed AFTER 's' in the innermost fused dimension. There should not be any dependencies between those two fused stages. If either of the stages being fused is a stage of an extern Func, this will throw an error.
Note that the two stages that are fused together should have the same exact schedule from the outermost to the innermost fused dimension, and the stage we are calling compute_with on should not have specializations, e.g. f2.compute_with(f1, x) is allowed only if f2 has no specializations.
Also, if a producer is desired to be computed at the fused loop level, the function passed to the compute_at() needs to be the "parent". Consider the following code:
To compute 'input' at the fused loop level at dimension y, we specify input.compute_at(g, y) instead of input.compute_at(f, y) since 'g' is the "parent" for this fused loop (i.e. 'g' is computed first before 'f' is computed). On the other hand, to compute 'input' at the innermost dimension of 'f', we specify input.compute_at(f, x) instead of input.compute_at(g, x) since the x dimension of 'f' is not fused (only the y dimension is).
Given the constraints, this has a variety of uses. Consider the following code:
This is equivalent to:
The size of the dimensions of the stages computed_with do not have to match. Consider the following code where 'g' is half the size of 'f':
This is equivalent to:
'align' specifies how the loop iteration of each dimension of the two stages being fused should be aligned in the fused loop nests (see LoopAlignStrategy for options). Consider the following loop nests:
If no alignment strategy is specified, the following loop nest will be generated:
Instead, these alignment strategies:
will produce the following loop nest:
LoopAlignStrategy::AlignStart on dimension z will shift the loop iteration of 'g' at dimension z so that its starting value matches that of 'f'. Likewise, LoopAlignStrategy::AlignEnd on dimension y will shift the loop iteration of 'g' at dimension y so that its end value matches that of 'f'.
Stage & Halide::Stage::compute_with | ( | LoopLevel | loop_level, |
LoopAlignStrategy | align = LoopAlignStrategy::Auto ) |
Stage & Halide::Stage::compute_with | ( | const Stage & | s, |
const VarOrRVar & | var, | ||
const std::vector< std::pair< VarOrRVar, LoopAlignStrategy > > & | align ) |
Stage & Halide::Stage::compute_with | ( | const Stage & | s, |
const VarOrRVar & | var, | ||
LoopAlignStrategy | align = LoopAlignStrategy::Auto ) |
Stage & Halide::Stage::split | ( | const VarOrRVar & | old, |
const VarOrRVar & | outer, | ||
const VarOrRVar & | inner, | ||
const Expr & | factor, | ||
TailStrategy | tail = TailStrategy::Auto ) |
Scheduling calls that control how the domain of this stage is traversed.
See the documentation for Func for the meanings.
Stage & Halide::Stage::fuse | ( | const VarOrRVar & | inner, |
const VarOrRVar & | outer, | ||
const VarOrRVar & | fused ) |
Referenced by do_cost_model_schedule().
Referenced by do_cost_model_schedule().
Referenced by do_cost_model_schedule().
Stage & Halide::Stage::parallel | ( | const VarOrRVar & | var, |
const Expr & | task_size, | ||
TailStrategy | tail = TailStrategy::Auto ) |
Stage & Halide::Stage::vectorize | ( | const VarOrRVar & | var, |
const Expr & | factor, | ||
TailStrategy | tail = TailStrategy::Auto ) |
Stage & Halide::Stage::unroll | ( | const VarOrRVar & | var, |
const Expr & | factor, | ||
TailStrategy | tail = TailStrategy::Auto ) |
Stage & Halide::Stage::never_partition_all | ( | ) |
Referenced by never_partition().
Stage & Halide::Stage::always_partition_all | ( | ) |
Referenced by always_partition().
Stage & Halide::Stage::tile | ( | const VarOrRVar & | x, |
const VarOrRVar & | y, | ||
const VarOrRVar & | xo, | ||
const VarOrRVar & | yo, | ||
const VarOrRVar & | xi, | ||
const VarOrRVar & | yi, | ||
const Expr & | xfactor, | ||
const Expr & | yfactor, | ||
TailStrategy | tail = TailStrategy::Auto ) |
Stage & Halide::Stage::tile | ( | const VarOrRVar & | x, |
const VarOrRVar & | y, | ||
const VarOrRVar & | xi, | ||
const VarOrRVar & | yi, | ||
const Expr & | xfactor, | ||
const Expr & | yfactor, | ||
TailStrategy | tail = TailStrategy::Auto ) |
Stage & Halide::Stage::tile | ( | const std::vector< VarOrRVar > & | previous, |
const std::vector< VarOrRVar > & | outers, | ||
const std::vector< VarOrRVar > & | inners, | ||
const std::vector< Expr > & | factors, | ||
const std::vector< TailStrategy > & | tails ) |
Stage & Halide::Stage::tile | ( | const std::vector< VarOrRVar > & | previous, |
const std::vector< VarOrRVar > & | outers, | ||
const std::vector< VarOrRVar > & | inners, | ||
const std::vector< Expr > & | factors, | ||
TailStrategy | tail = TailStrategy::Auto ) |
Stage & Halide::Stage::tile | ( | const std::vector< VarOrRVar > & | previous, |
const std::vector< VarOrRVar > & | inners, | ||
const std::vector< Expr > & | factors, | ||
TailStrategy | tail = TailStrategy::Auto ) |
Referenced by do_cost_model_schedule(), and reorder().
|
inline |
|
inline |
Definition at line 390 of file Func.h.
References never_partition().
|
inline |
Definition at line 397 of file Func.h.
References always_partition().
void Halide::Stage::specialize_fail | ( | const std::string & | message | ) |
Stage & Halide::Stage::gpu_threads | ( | const VarOrRVar & | thread_x, |
DeviceAPI | device_api = DeviceAPI::Default_GPU ) |
Stage & Halide::Stage::gpu_threads | ( | const VarOrRVar & | thread_x, |
const VarOrRVar & | thread_y, | ||
DeviceAPI | device_api = DeviceAPI::Default_GPU ) |
Stage & Halide::Stage::gpu_threads | ( | const VarOrRVar & | thread_x, |
const VarOrRVar & | thread_y, | ||
const VarOrRVar & | thread_z, | ||
DeviceAPI | device_api = DeviceAPI::Default_GPU ) |
Stage & Halide::Stage::gpu_lanes | ( | const VarOrRVar & | thread_x, |
DeviceAPI | device_api = DeviceAPI::Default_GPU ) |
Stage & Halide::Stage::gpu_single_thread | ( | DeviceAPI | device_api = DeviceAPI::Default_GPU | ) |
Stage & Halide::Stage::gpu_blocks | ( | const VarOrRVar & | block_x, |
DeviceAPI | device_api = DeviceAPI::Default_GPU ) |
Stage & Halide::Stage::gpu_blocks | ( | const VarOrRVar & | block_x, |
const VarOrRVar & | block_y, | ||
DeviceAPI | device_api = DeviceAPI::Default_GPU ) |
Stage & Halide::Stage::gpu_blocks | ( | const VarOrRVar & | block_x, |
const VarOrRVar & | block_y, | ||
const VarOrRVar & | block_z, | ||
DeviceAPI | device_api = DeviceAPI::Default_GPU ) |
Stage & Halide::Stage::gpu | ( | const VarOrRVar & | block_x, |
const VarOrRVar & | thread_x, | ||
DeviceAPI | device_api = DeviceAPI::Default_GPU ) |
Stage & Halide::Stage::gpu | ( | const VarOrRVar & | block_x, |
const VarOrRVar & | block_y, | ||
const VarOrRVar & | thread_x, | ||
const VarOrRVar & | thread_y, | ||
DeviceAPI | device_api = DeviceAPI::Default_GPU ) |
Stage & Halide::Stage::gpu | ( | const VarOrRVar & | block_x, |
const VarOrRVar & | block_y, | ||
const VarOrRVar & | block_z, | ||
const VarOrRVar & | thread_x, | ||
const VarOrRVar & | thread_y, | ||
const VarOrRVar & | thread_z, | ||
DeviceAPI | device_api = DeviceAPI::Default_GPU ) |
Stage & Halide::Stage::gpu_tile | ( | const VarOrRVar & | x, |
const VarOrRVar & | bx, | ||
const VarOrRVar & | tx, | ||
const Expr & | x_size, | ||
TailStrategy | tail = TailStrategy::Auto, | ||
DeviceAPI | device_api = DeviceAPI::Default_GPU ) |
Stage & Halide::Stage::gpu_tile | ( | const VarOrRVar & | x, |
const VarOrRVar & | tx, | ||
const Expr & | x_size, | ||
TailStrategy | tail = TailStrategy::Auto, | ||
DeviceAPI | device_api = DeviceAPI::Default_GPU ) |
Stage & Halide::Stage::gpu_tile | ( | const VarOrRVar & | x, |
const VarOrRVar & | y, | ||
const VarOrRVar & | bx, | ||
const VarOrRVar & | by, | ||
const VarOrRVar & | tx, | ||
const VarOrRVar & | ty, | ||
const Expr & | x_size, | ||
const Expr & | y_size, | ||
TailStrategy | tail = TailStrategy::Auto, | ||
DeviceAPI | device_api = DeviceAPI::Default_GPU ) |
Stage & Halide::Stage::gpu_tile | ( | const VarOrRVar & | x, |
const VarOrRVar & | y, | ||
const VarOrRVar & | tx, | ||
const VarOrRVar & | ty, | ||
const Expr & | x_size, | ||
const Expr & | y_size, | ||
TailStrategy | tail = TailStrategy::Auto, | ||
DeviceAPI | device_api = DeviceAPI::Default_GPU ) |
Stage & Halide::Stage::gpu_tile | ( | const VarOrRVar & | x, |
const VarOrRVar & | y, | ||
const VarOrRVar & | z, | ||
const VarOrRVar & | bx, | ||
const VarOrRVar & | by, | ||
const VarOrRVar & | bz, | ||
const VarOrRVar & | tx, | ||
const VarOrRVar & | ty, | ||
const VarOrRVar & | tz, | ||
const Expr & | x_size, | ||
const Expr & | y_size, | ||
const Expr & | z_size, | ||
TailStrategy | tail = TailStrategy::Auto, | ||
DeviceAPI | device_api = DeviceAPI::Default_GPU ) |
Stage & Halide::Stage::gpu_tile | ( | const VarOrRVar & | x, |
const VarOrRVar & | y, | ||
const VarOrRVar & | z, | ||
const VarOrRVar & | tx, | ||
const VarOrRVar & | ty, | ||
const VarOrRVar & | tz, | ||
const Expr & | x_size, | ||
const Expr & | y_size, | ||
const Expr & | z_size, | ||
TailStrategy | tail = TailStrategy::Auto, | ||
DeviceAPI | device_api = DeviceAPI::Default_GPU ) |
Stage & Halide::Stage::allow_race_conditions | ( | ) |
Stage & Halide::Stage::atomic | ( | bool | override_associativity_test = false | ) |
Stage & Halide::Stage::hexagon | ( | const VarOrRVar & | x = Var::outermost() | ) |
Stage & Halide::Stage::prefetch | ( | const Func & | f, |
const VarOrRVar & | at, | ||
const VarOrRVar & | from, | ||
Expr | offset = 1, | ||
PrefetchBoundStrategy | strategy = PrefetchBoundStrategy::GuardWithIf ) |
Referenced by prefetch().
Stage & Halide::Stage::prefetch | ( | const Parameter & | param, |
const VarOrRVar & | at, | ||
const VarOrRVar & | from, | ||
Expr | offset = 1, | ||
PrefetchBoundStrategy | strategy = PrefetchBoundStrategy::GuardWithIf ) |
|
inline |
Definition at line 468 of file Func.h.
References prefetch().
void Halide::Stage::unscheduled | ( | ) |
Assert that this stage has intentionally been given no schedule, and suppress the warning about unscheduled update definitions that would otherwise fire.
This counts as a schedule, so calling this twice on the same Stage will fail the assertion.