Halide
Halide::Stage Class Reference

A single definition of a Func. More...

#include <Func.h>

Public Member Functions

 Stage (Internal::Function f, Internal::Definition d, size_t stage_index)
 
const Internal::StageScheduleget_schedule () const
 Return the current StageSchedule associated with this Stage. More...
 
std::string dump_argument_list () const
 Return a string describing the current var list taking into account all the splits, reorders, and tiles. More...
 
std::string name () const
 Return the name of this stage, e.g. More...
 
Func rfactor (std::vector< std::pair< RVar, Var >> preserved)
 Calling rfactor() on an associative update definition a Func will split the update into an intermediate which computes the partial results and replaces the current update definition with a new definition which merges the partial results. More...
 
Func rfactor (const RVar &r, const Var &v)
 
Stagecompute_with (LoopLevel loop_level, const std::vector< std::pair< VarOrRVar, LoopAlignStrategy >> &align)
 Schedule the iteration over this stage to be fused with another stage 's' from outermost loop to a given LoopLevel. More...
 
Stagecompute_with (LoopLevel loop_level, LoopAlignStrategy align=LoopAlignStrategy::Auto)
 
Stagecompute_with (const Stage &s, const VarOrRVar &var, const std::vector< std::pair< VarOrRVar, LoopAlignStrategy >> &align)
 
Stagecompute_with (const Stage &s, const VarOrRVar &var, LoopAlignStrategy align=LoopAlignStrategy::Auto)
 
Stagesplit (const VarOrRVar &old, const VarOrRVar &outer, const VarOrRVar &inner, const Expr &factor, TailStrategy tail=TailStrategy::Auto)
 Scheduling calls that control how the domain of this stage is traversed. More...
 
Stagefuse (const VarOrRVar &inner, const VarOrRVar &outer, const VarOrRVar &fused)
 
Stageserial (const VarOrRVar &var)
 
Stageparallel (const VarOrRVar &var)
 
Stagevectorize (const VarOrRVar &var)
 
Stageunroll (const VarOrRVar &var)
 
Stageparallel (const VarOrRVar &var, const Expr &task_size, TailStrategy tail=TailStrategy::Auto)
 
Stagevectorize (const VarOrRVar &var, const Expr &factor, TailStrategy tail=TailStrategy::Auto)
 
Stageunroll (const VarOrRVar &var, const Expr &factor, TailStrategy tail=TailStrategy::Auto)
 
Stagetile (const VarOrRVar &x, const VarOrRVar &y, const VarOrRVar &xo, const VarOrRVar &yo, const VarOrRVar &xi, const VarOrRVar &yi, const Expr &xfactor, const Expr &yfactor, TailStrategy tail=TailStrategy::Auto)
 
Stagetile (const VarOrRVar &x, const VarOrRVar &y, const VarOrRVar &xi, const VarOrRVar &yi, const Expr &xfactor, const Expr &yfactor, TailStrategy tail=TailStrategy::Auto)
 
Stagetile (const std::vector< VarOrRVar > &previous, const std::vector< VarOrRVar > &outers, const std::vector< VarOrRVar > &inners, const std::vector< Expr > &factors, const std::vector< TailStrategy > &tails)
 
Stagetile (const std::vector< VarOrRVar > &previous, const std::vector< VarOrRVar > &outers, const std::vector< VarOrRVar > &inners, const std::vector< Expr > &factors, TailStrategy tail=TailStrategy::Auto)
 
Stagetile (const std::vector< VarOrRVar > &previous, const std::vector< VarOrRVar > &inners, const std::vector< Expr > &factors, TailStrategy tail=TailStrategy::Auto)
 
Stagereorder (const std::vector< VarOrRVar > &vars)
 
template<typename... Args>
HALIDE_NO_USER_CODE_INLINE std::enable_if< Internal::all_are_convertible< VarOrRVar, Args... >::value, Stage & >::type reorder (const VarOrRVar &x, const VarOrRVar &y, Args &&... args)
 
Stagerename (const VarOrRVar &old_name, const VarOrRVar &new_name)
 
Stage specialize (const Expr &condition)
 
void specialize_fail (const std::string &message)
 
Stagegpu_threads (const VarOrRVar &thread_x, DeviceAPI device_api=DeviceAPI::Default_GPU)
 
Stagegpu_threads (const VarOrRVar &thread_x, const VarOrRVar &thread_y, DeviceAPI device_api=DeviceAPI::Default_GPU)
 
Stagegpu_threads (const VarOrRVar &thread_x, const VarOrRVar &thread_y, const VarOrRVar &thread_z, DeviceAPI device_api=DeviceAPI::Default_GPU)
 
Stagegpu_lanes (const VarOrRVar &thread_x, DeviceAPI device_api=DeviceAPI::Default_GPU)
 
Stagegpu_single_thread (DeviceAPI device_api=DeviceAPI::Default_GPU)
 
Stagegpu_blocks (const VarOrRVar &block_x, DeviceAPI device_api=DeviceAPI::Default_GPU)
 
Stagegpu_blocks (const VarOrRVar &block_x, const VarOrRVar &block_y, DeviceAPI device_api=DeviceAPI::Default_GPU)
 
Stagegpu_blocks (const VarOrRVar &block_x, const VarOrRVar &block_y, const VarOrRVar &block_z, DeviceAPI device_api=DeviceAPI::Default_GPU)
 
Stagegpu (const VarOrRVar &block_x, const VarOrRVar &thread_x, DeviceAPI device_api=DeviceAPI::Default_GPU)
 
Stagegpu (const VarOrRVar &block_x, const VarOrRVar &block_y, const VarOrRVar &thread_x, const VarOrRVar &thread_y, DeviceAPI device_api=DeviceAPI::Default_GPU)
 
Stagegpu (const VarOrRVar &block_x, const VarOrRVar &block_y, const VarOrRVar &block_z, const VarOrRVar &thread_x, const VarOrRVar &thread_y, const VarOrRVar &thread_z, DeviceAPI device_api=DeviceAPI::Default_GPU)
 
Stagegpu_tile (const VarOrRVar &x, const VarOrRVar &bx, const VarOrRVar &tx, const Expr &x_size, TailStrategy tail=TailStrategy::Auto, DeviceAPI device_api=DeviceAPI::Default_GPU)
 
Stagegpu_tile (const VarOrRVar &x, const VarOrRVar &tx, const Expr &x_size, TailStrategy tail=TailStrategy::Auto, DeviceAPI device_api=DeviceAPI::Default_GPU)
 
Stagegpu_tile (const VarOrRVar &x, const VarOrRVar &y, const VarOrRVar &bx, const VarOrRVar &by, const VarOrRVar &tx, const VarOrRVar &ty, const Expr &x_size, const Expr &y_size, TailStrategy tail=TailStrategy::Auto, DeviceAPI device_api=DeviceAPI::Default_GPU)
 
Stagegpu_tile (const VarOrRVar &x, const VarOrRVar &y, const VarOrRVar &tx, const VarOrRVar &ty, const Expr &x_size, const Expr &y_size, TailStrategy tail=TailStrategy::Auto, DeviceAPI device_api=DeviceAPI::Default_GPU)
 
Stagegpu_tile (const VarOrRVar &x, const VarOrRVar &y, const VarOrRVar &z, const VarOrRVar &bx, const VarOrRVar &by, const VarOrRVar &bz, const VarOrRVar &tx, const VarOrRVar &ty, const VarOrRVar &tz, const Expr &x_size, const Expr &y_size, const Expr &z_size, TailStrategy tail=TailStrategy::Auto, DeviceAPI device_api=DeviceAPI::Default_GPU)
 
Stagegpu_tile (const VarOrRVar &x, const VarOrRVar &y, const VarOrRVar &z, const VarOrRVar &tx, const VarOrRVar &ty, const VarOrRVar &tz, const Expr &x_size, const Expr &y_size, const Expr &z_size, TailStrategy tail=TailStrategy::Auto, DeviceAPI device_api=DeviceAPI::Default_GPU)
 
Stageallow_race_conditions ()
 
Stageatomic (bool override_associativity_test=false)
 
Stagehexagon (const VarOrRVar &x=Var::outermost())
 
Stageprefetch (const Func &f, const VarOrRVar &var, Expr offset=1, PrefetchBoundStrategy strategy=PrefetchBoundStrategy::GuardWithIf)
 
Stageprefetch (const Internal::Parameter &param, const VarOrRVar &var, Expr offset=1, PrefetchBoundStrategy strategy=PrefetchBoundStrategy::GuardWithIf)
 
template<typename T >
Stageprefetch (const T &image, VarOrRVar var, Expr offset=1, PrefetchBoundStrategy strategy=PrefetchBoundStrategy::GuardWithIf)
 
std::string source_location () const
 Attempt to get the source file and line where this stage was defined by parsing the process's own debug symbols. More...
 

Detailed Description

A single definition of a Func.

May be a pure or update definition.

Definition at line 69 of file Func.h.

Constructor & Destructor Documentation

◆ Stage()

Member Function Documentation

◆ get_schedule()

const Internal::StageSchedule& Halide::Stage::get_schedule ( ) const
inline

Return the current StageSchedule associated with this Stage.

For introspection only: to modify schedule, use the Func interface.

Definition at line 107 of file Func.h.

References Halide::Internal::Definition::schedule().

Referenced by do_cost_model_schedule(), and Halide::Func::get_schedule().

◆ dump_argument_list()

std::string Halide::Stage::dump_argument_list ( ) const

Return a string describing the current var list taking into account all the splits, reorders, and tiles.

◆ name()

std::string Halide::Stage::name ( ) const

Return the name of this stage, e.g.

"f.update(2)"

◆ rfactor() [1/2]

Func Halide::Stage::rfactor ( std::vector< std::pair< RVar, Var >>  preserved)

Calling rfactor() on an associative update definition a Func will split the update into an intermediate which computes the partial results and replaces the current update definition with a new definition which merges the partial results.

If called on a init/pure definition, this will throw an error. rfactor() will automatically infer the associative reduction operator and identity of the operator. If it can't prove the operation is associative or if it cannot find an identity for that operator, this will throw an error. In addition, commutativity of the operator is required if rfactor() is called on the inner dimension but excluding the outer dimensions.

rfactor() takes as input 'preserved', which is a list of <RVar, Var> pairs. The rvars not listed in 'preserved' are removed from the original Func and are lifted to the intermediate Func. The remaining rvars (the ones in 'preserved') are made pure in the intermediate Func. The intermediate Func's update definition inherits all scheduling directives (e.g. split,fuse, etc.) applied to the original Func's update definition. The loop order of the intermediate Func's update definition is the same as the original, although the RVars in 'preserved' are replaced by the new pure Vars. The loop order of the intermediate Func's init definition from innermost to outermost is the args' order of the original Func's init definition followed by the new pure Vars.

The intermediate Func also inherits storage order from the original Func with the new pure Vars added to the outermost.

For example, f.update(0).rfactor({{r.y, u}}) would rewrite a pipeline like this:

f(x, y) = 0;
f(x, y) += g(r.x, r.y);

into a pipeline like this:

f_intm(x, y, u) = 0;
f_intm(x, y, u) += g(r.x, u);
f(x, y) = 0;
f(x, y) += f_intm(x, y, r.y);

This has a variety of uses. You can use it to split computation of an associative reduction:

f(x, y) = 10;
RDom r(0, 96);
f(x, y) = max(f(x, y), g(x, y, r.x));
f.update(0).split(r.x, rxo, rxi, 8).reorder(y, x).parallel(x);
f.update(0).rfactor({{rxo, u}}).compute_root().parallel(u).update(0).parallel(u);

, which is equivalent to:

parallel for u = 0 to 11:
for y:
for x:
f_intm(x, y, u) = -inf
parallel for x:
for y:
parallel for u = 0 to 11:
for rxi = 0 to 7:
f_intm(x, y, u) = max(f_intm(x, y, u), g(8*u + rxi))
for y:
for x:
f(x, y) = 10
parallel for x:
for y:
for rxo = 0 to 11:
f(x, y) = max(f(x, y), f_intm(x, y, rxo))

◆ rfactor() [2/2]

Func Halide::Stage::rfactor ( const RVar r,
const Var v 
)

◆ compute_with() [1/4]

Stage& Halide::Stage::compute_with ( LoopLevel  loop_level,
const std::vector< std::pair< VarOrRVar, LoopAlignStrategy >> &  align 
)

Schedule the iteration over this stage to be fused with another stage 's' from outermost loop to a given LoopLevel.

'this' stage will be computed AFTER 's' in the innermost fused dimension. There should not be any dependencies between those two fused stages. If either of the stages being fused is a stage of an extern Func, this will throw an error.

Note that the two stages that are fused together should have the same exact schedule from the outermost to the innermost fused dimension, and the stage we are calling compute_with on should not have specializations, e.g. f2.compute_with(f1, x) is allowed only if f2 has no specializations.

Also, if a producer is desired to be computed at the fused loop level, the function passed to the compute_at() needs to be the "parent". Consider the following code:

input(x, y) = x + y;
f(x, y) = input(x, y);
f(x, y) += 5;
g(x, y) = x - y;
g(x, y) += 10;
f.compute_with(g, y);
f.update().compute_with(g.update(), y);

To compute 'input' at the fused loop level at dimension y, we specify input.compute_at(g, y) instead of input.compute_at(f, y) since 'g' is the "parent" for this fused loop (i.e. 'g' is computed first before 'f' is computed). On the other hand, to compute 'input' at the innermost dimension of 'f', we specify input.compute_at(f, x) instead of input.compute_at(g, x) since the x dimension of 'f' is not fused (only the y dimension is).

Given the constraints, this has a variety of uses. Consider the following code:

f(x, y) = x + y;
g(x, y) = x - y;
h(x, y) = f(x, y) + g(x, y);
f.compute_root();
g.compute_root();
f.split(x, xo, xi, 8);
g.split(x, xo, xi, 8);
g.compute_with(f, xo);

This is equivalent to:

for y:
for xo:
for xi:
f(8*xo + xi) = (8*xo + xi) + y
for xi:
g(8*xo + xi) = (8*xo + xi) - y
for y:
for x:
h(x, y) = f(x, y) + g(x, y)

The size of the dimensions of the stages computed_with do not have to match. Consider the following code where 'g' is half the size of 'f':

Image<int> f_im(size, size), g_im(size/2, size/2);
input(x, y) = x + y;
f(x, y) = input(x, y);
g(x, y) = input(2*x, 2*y);
g.compute_with(f, y);
input.compute_at(f, y);
Pipeline({f, g}).realize({f_im, g_im});

This is equivalent to:

for y = 0 to size-1:
for x = 0 to size-1:
input(x, y) = x + y;
for x = 0 to size-1:
f(x, y) = input(x, y)
for x = 0 to size/2-1:
if (y < size/2-1):
g(x, y) = input(2*x, 2*y)

'align' specifies how the loop iteration of each dimension of the two stages being fused should be aligned in the fused loop nests (see LoopAlignStrategy for options). Consider the following loop nests:

for z = f_min_z to f_max_z:
for y = f_min_y to f_max_y:
for x = f_min_x to f_max_x:
f(x, y, z) = x + y + z
for z = g_min_z to g_max_z:
for y = g_min_y to g_max_y:
for x = g_min_x to g_max_x:
g(x, y, z) = x - y - z

If no alignment strategy is specified, the following loop nest will be generated:

for z = min(f_min_z, g_min_z) to max(f_max_z, g_max_z):
for y = min(f_min_y, g_min_y) to max(f_max_y, g_max_y):
for x = f_min_x to f_max_x:
if (f_min_z <= z <= f_max_z):
if (f_min_y <= y <= f_max_y):
f(x, y, z) = x + y + z
for x = g_min_x to g_max_x:
if (g_min_z <= z <= g_max_z):
if (g_min_y <= y <= g_max_y):
g(x, y, z) = x - y - z

Instead, these alignment strategies:

will produce the following loop nest:

f_loop_min_z = f_min_z
f_loop_max_z = max(f_max_z, (f_min_z - g_min_z) + g_max_z)
for z = f_min_z to f_loop_max_z:
f_loop_min_y = min(f_min_y, (f_max_y - g_max_y) + g_min_y)
f_loop_max_y = f_max_y
for y = f_loop_min_y to f_loop_max_y:
for x = f_min_x to f_max_x:
if (f_loop_min_z <= z <= f_loop_max_z):
if (f_loop_min_y <= y <= f_loop_max_y):
f(x, y, z) = x + y + z
for x = g_min_x to g_max_x:
g_shift_z = g_min_z - f_loop_min_z
g_shift_y = g_max_y - f_loop_max_y
if (g_min_z <= (z + g_shift_z) <= g_max_z):
if (g_min_y <= (y + g_shift_y) <= g_max_y):
g(x, y + g_shift_y, z + g_shift_z) = x - (y + g_shift_y) - (z + g_shift_z)

LoopAlignStrategy::AlignStart on dimension z will shift the loop iteration of 'g' at dimension z so that its starting value matches that of 'f'. Likewise, LoopAlignStrategy::AlignEnd on dimension y will shift the loop iteration of 'g' at dimension y so that its end value matches that of 'f'.

◆ compute_with() [2/4]

Stage& Halide::Stage::compute_with ( LoopLevel  loop_level,
LoopAlignStrategy  align = LoopAlignStrategy::Auto 
)

◆ compute_with() [3/4]

Stage& Halide::Stage::compute_with ( const Stage s,
const VarOrRVar var,
const std::vector< std::pair< VarOrRVar, LoopAlignStrategy >> &  align 
)

◆ compute_with() [4/4]

Stage& Halide::Stage::compute_with ( const Stage s,
const VarOrRVar var,
LoopAlignStrategy  align = LoopAlignStrategy::Auto 
)

◆ split()

Stage& Halide::Stage::split ( const VarOrRVar old,
const VarOrRVar outer,
const VarOrRVar inner,
const Expr factor,
TailStrategy  tail = TailStrategy::Auto 
)

Scheduling calls that control how the domain of this stage is traversed.

See the documentation for Func for the meanings.

◆ fuse()

Stage& Halide::Stage::fuse ( const VarOrRVar inner,
const VarOrRVar outer,
const VarOrRVar fused 
)

◆ serial()

Stage& Halide::Stage::serial ( const VarOrRVar var)

◆ parallel() [1/2]

Stage& Halide::Stage::parallel ( const VarOrRVar var)

Referenced by do_cost_model_schedule().

◆ vectorize() [1/2]

Stage& Halide::Stage::vectorize ( const VarOrRVar var)

◆ unroll() [1/2]

Stage& Halide::Stage::unroll ( const VarOrRVar var)

◆ parallel() [2/2]

Stage& Halide::Stage::parallel ( const VarOrRVar var,
const Expr task_size,
TailStrategy  tail = TailStrategy::Auto 
)

◆ vectorize() [2/2]

Stage& Halide::Stage::vectorize ( const VarOrRVar var,
const Expr factor,
TailStrategy  tail = TailStrategy::Auto 
)

◆ unroll() [2/2]

Stage& Halide::Stage::unroll ( const VarOrRVar var,
const Expr factor,
TailStrategy  tail = TailStrategy::Auto 
)

◆ tile() [1/5]

Stage& Halide::Stage::tile ( const VarOrRVar x,
const VarOrRVar y,
const VarOrRVar xo,
const VarOrRVar yo,
const VarOrRVar xi,
const VarOrRVar yi,
const Expr xfactor,
const Expr yfactor,
TailStrategy  tail = TailStrategy::Auto 
)

◆ tile() [2/5]

Stage& Halide::Stage::tile ( const VarOrRVar x,
const VarOrRVar y,
const VarOrRVar xi,
const VarOrRVar yi,
const Expr xfactor,
const Expr yfactor,
TailStrategy  tail = TailStrategy::Auto 
)

◆ tile() [3/5]

Stage& Halide::Stage::tile ( const std::vector< VarOrRVar > &  previous,
const std::vector< VarOrRVar > &  outers,
const std::vector< VarOrRVar > &  inners,
const std::vector< Expr > &  factors,
const std::vector< TailStrategy > &  tails 
)

◆ tile() [4/5]

Stage& Halide::Stage::tile ( const std::vector< VarOrRVar > &  previous,
const std::vector< VarOrRVar > &  outers,
const std::vector< VarOrRVar > &  inners,
const std::vector< Expr > &  factors,
TailStrategy  tail = TailStrategy::Auto 
)

◆ tile() [5/5]

Stage& Halide::Stage::tile ( const std::vector< VarOrRVar > &  previous,
const std::vector< VarOrRVar > &  inners,
const std::vector< Expr > &  factors,
TailStrategy  tail = TailStrategy::Auto 
)

◆ reorder() [1/2]

Stage& Halide::Stage::reorder ( const std::vector< VarOrRVar > &  vars)

Referenced by do_cost_model_schedule(), and reorder().

◆ reorder() [2/2]

template<typename... Args>
HALIDE_NO_USER_CODE_INLINE std::enable_if<Internal::all_are_convertible<VarOrRVar, Args...>::value, Stage &>::type Halide::Stage::reorder ( const VarOrRVar x,
const VarOrRVar y,
Args &&...  args 
)
inline

Definition at line 378 of file Func.h.

References reorder().

◆ rename()

Stage& Halide::Stage::rename ( const VarOrRVar old_name,
const VarOrRVar new_name 
)

◆ specialize()

Stage Halide::Stage::specialize ( const Expr condition)

◆ specialize_fail()

void Halide::Stage::specialize_fail ( const std::string &  message)

◆ gpu_threads() [1/3]

Stage& Halide::Stage::gpu_threads ( const VarOrRVar thread_x,
DeviceAPI  device_api = DeviceAPI::Default_GPU 
)

◆ gpu_threads() [2/3]

Stage& Halide::Stage::gpu_threads ( const VarOrRVar thread_x,
const VarOrRVar thread_y,
DeviceAPI  device_api = DeviceAPI::Default_GPU 
)

◆ gpu_threads() [3/3]

Stage& Halide::Stage::gpu_threads ( const VarOrRVar thread_x,
const VarOrRVar thread_y,
const VarOrRVar thread_z,
DeviceAPI  device_api = DeviceAPI::Default_GPU 
)

◆ gpu_lanes()

Stage& Halide::Stage::gpu_lanes ( const VarOrRVar thread_x,
DeviceAPI  device_api = DeviceAPI::Default_GPU 
)

◆ gpu_single_thread()

Stage& Halide::Stage::gpu_single_thread ( DeviceAPI  device_api = DeviceAPI::Default_GPU)

◆ gpu_blocks() [1/3]

Stage& Halide::Stage::gpu_blocks ( const VarOrRVar block_x,
DeviceAPI  device_api = DeviceAPI::Default_GPU 
)

◆ gpu_blocks() [2/3]

Stage& Halide::Stage::gpu_blocks ( const VarOrRVar block_x,
const VarOrRVar block_y,
DeviceAPI  device_api = DeviceAPI::Default_GPU 
)

◆ gpu_blocks() [3/3]

Stage& Halide::Stage::gpu_blocks ( const VarOrRVar block_x,
const VarOrRVar block_y,
const VarOrRVar block_z,
DeviceAPI  device_api = DeviceAPI::Default_GPU 
)

◆ gpu() [1/3]

Stage& Halide::Stage::gpu ( const VarOrRVar block_x,
const VarOrRVar thread_x,
DeviceAPI  device_api = DeviceAPI::Default_GPU 
)

◆ gpu() [2/3]

Stage& Halide::Stage::gpu ( const VarOrRVar block_x,
const VarOrRVar block_y,
const VarOrRVar thread_x,
const VarOrRVar thread_y,
DeviceAPI  device_api = DeviceAPI::Default_GPU 
)

◆ gpu() [3/3]

Stage& Halide::Stage::gpu ( const VarOrRVar block_x,
const VarOrRVar block_y,
const VarOrRVar block_z,
const VarOrRVar thread_x,
const VarOrRVar thread_y,
const VarOrRVar thread_z,
DeviceAPI  device_api = DeviceAPI::Default_GPU 
)

◆ gpu_tile() [1/6]

Stage& Halide::Stage::gpu_tile ( const VarOrRVar x,
const VarOrRVar bx,
const VarOrRVar tx,
const Expr x_size,
TailStrategy  tail = TailStrategy::Auto,
DeviceAPI  device_api = DeviceAPI::Default_GPU 
)

◆ gpu_tile() [2/6]

Stage& Halide::Stage::gpu_tile ( const VarOrRVar x,
const VarOrRVar tx,
const Expr x_size,
TailStrategy  tail = TailStrategy::Auto,
DeviceAPI  device_api = DeviceAPI::Default_GPU 
)

◆ gpu_tile() [3/6]

Stage& Halide::Stage::gpu_tile ( const VarOrRVar x,
const VarOrRVar y,
const VarOrRVar bx,
const VarOrRVar by,
const VarOrRVar tx,
const VarOrRVar ty,
const Expr x_size,
const Expr y_size,
TailStrategy  tail = TailStrategy::Auto,
DeviceAPI  device_api = DeviceAPI::Default_GPU 
)

◆ gpu_tile() [4/6]

Stage& Halide::Stage::gpu_tile ( const VarOrRVar x,
const VarOrRVar y,
const VarOrRVar tx,
const VarOrRVar ty,
const Expr x_size,
const Expr y_size,
TailStrategy  tail = TailStrategy::Auto,
DeviceAPI  device_api = DeviceAPI::Default_GPU 
)

◆ gpu_tile() [5/6]

Stage& Halide::Stage::gpu_tile ( const VarOrRVar x,
const VarOrRVar y,
const VarOrRVar z,
const VarOrRVar bx,
const VarOrRVar by,
const VarOrRVar bz,
const VarOrRVar tx,
const VarOrRVar ty,
const VarOrRVar tz,
const Expr x_size,
const Expr y_size,
const Expr z_size,
TailStrategy  tail = TailStrategy::Auto,
DeviceAPI  device_api = DeviceAPI::Default_GPU 
)

◆ gpu_tile() [6/6]

Stage& Halide::Stage::gpu_tile ( const VarOrRVar x,
const VarOrRVar y,
const VarOrRVar z,
const VarOrRVar tx,
const VarOrRVar ty,
const VarOrRVar tz,
const Expr x_size,
const Expr y_size,
const Expr z_size,
TailStrategy  tail = TailStrategy::Auto,
DeviceAPI  device_api = DeviceAPI::Default_GPU 
)

◆ allow_race_conditions()

Stage& Halide::Stage::allow_race_conditions ( )

◆ atomic()

Stage& Halide::Stage::atomic ( bool  override_associativity_test = false)

◆ hexagon()

Stage& Halide::Stage::hexagon ( const VarOrRVar x = Var::outermost())

◆ prefetch() [1/3]

Stage& Halide::Stage::prefetch ( const Func f,
const VarOrRVar var,
Expr  offset = 1,
PrefetchBoundStrategy  strategy = PrefetchBoundStrategy::GuardWithIf 
)

Referenced by prefetch().

◆ prefetch() [2/3]

Stage& Halide::Stage::prefetch ( const Internal::Parameter param,
const VarOrRVar var,
Expr  offset = 1,
PrefetchBoundStrategy  strategy = PrefetchBoundStrategy::GuardWithIf 
)

◆ prefetch() [3/3]

template<typename T >
Stage& Halide::Stage::prefetch ( const T &  image,
VarOrRVar  var,
Expr  offset = 1,
PrefetchBoundStrategy  strategy = PrefetchBoundStrategy::GuardWithIf 
)
inline

Definition at line 448 of file Func.h.

References prefetch().

◆ source_location()

std::string Halide::Stage::source_location ( ) const

Attempt to get the source file and line where this stage was defined by parsing the process's own debug symbols.

Returns an empty string if no debug symbols were found or the debug symbols were not understood. Works on OS X and Linux only.


The documentation for this class was generated from the following file:
Halide::Stage::parallel
Stage & parallel(const VarOrRVar &var)
Halide::LoopAlignStrategy::AlignEnd
@ AlignEnd
Shift the end of the fused loops to align.
Halide::min
Expr min(const FuncRef &a, const FuncRef &b)
Explicit overloads of min and max for FuncRef.
Definition: Func.h:577
Halide::LoopAlignStrategy::AlignStart
@ AlignStart
Shift the start of the fused loops to align.
Halide::max
Expr max(const FuncRef &a, const FuncRef &b)
Definition: Func.h:580