A single definition of a Func. More...

#include <Func.h>

Public Member Functions
	Stage (Internal::Function f, Internal::Definition d, size_t stage_index)

const Internal::StageSchedule &	get_schedule () const
	Return the current StageSchedule associated with this Stage.

std::string	dump_argument_list () const
	Return a string describing the current var list taking into account all the splits, reorders, and tiles.

std::string	name () const
	Return the name of this stage, e.g.

Func	rfactor (std::vector< std::pair< RVar, Var > > preserved)
	Calling rfactor() on an associative update definition a Func will split the update into an intermediate which computes the partial results and replaces the current update definition with a new definition which merges the partial results.

Func	rfactor (const RVar &r, const Var &v)

Stage &	compute_with (LoopLevel loop_level, const std::vector< std::pair< VarOrRVar, LoopAlignStrategy > > &align)
	Schedule the iteration over this stage to be fused with another stage 's' from outermost loop to a given LoopLevel.

Stage &	compute_with (LoopLevel loop_level, LoopAlignStrategy align=LoopAlignStrategy::Auto)

Stage &	compute_with (const Stage &s, const VarOrRVar &var, const std::vector< std::pair< VarOrRVar, LoopAlignStrategy > > &align)

Stage &	compute_with (const Stage &s, const VarOrRVar &var, LoopAlignStrategy align=LoopAlignStrategy::Auto)

Stage &	split (const VarOrRVar &old, const VarOrRVar &outer, const VarOrRVar &inner, const Expr &factor, TailStrategy tail=TailStrategy::Auto)
	Scheduling calls that control how the domain of this stage is traversed.

Stage &	fuse (const VarOrRVar &inner, const VarOrRVar &outer, const VarOrRVar &fused)

Stage &	serial (const VarOrRVar &var)

Stage &	parallel (const VarOrRVar &var)

Stage &	vectorize (const VarOrRVar &var)

Stage &	unroll (const VarOrRVar &var)

Stage &	parallel (const VarOrRVar &var, const Expr &task_size, TailStrategy tail=TailStrategy::Auto)

Stage &	vectorize (const VarOrRVar &var, const Expr &factor, TailStrategy tail=TailStrategy::Auto)

Stage &	unroll (const VarOrRVar &var, const Expr &factor, TailStrategy tail=TailStrategy::Auto)

Stage &	partition (const VarOrRVar &var, Partition partition_policy)

Stage &	never_partition_all ()

Stage &	never_partition (const std::vector< VarOrRVar > &vars)

Stage &	always_partition_all ()

Stage &	always_partition (const std::vector< VarOrRVar > &vars)

Stage &	tile (const VarOrRVar &x, const VarOrRVar &y, const VarOrRVar &xo, const VarOrRVar &yo, const VarOrRVar &xi, const VarOrRVar &yi, const Expr &xfactor, const Expr &yfactor, TailStrategy tail=TailStrategy::Auto)

Stage &	tile (const VarOrRVar &x, const VarOrRVar &y, const VarOrRVar &xi, const VarOrRVar &yi, const Expr &xfactor, const Expr &yfactor, TailStrategy tail=TailStrategy::Auto)

Stage &	tile (const std::vector< VarOrRVar > &previous, const std::vector< VarOrRVar > &outers, const std::vector< VarOrRVar > &inners, const std::vector< Expr > &factors, const std::vector< TailStrategy > &tails)

Stage &	tile (const std::vector< VarOrRVar > &previous, const std::vector< VarOrRVar > &outers, const std::vector< VarOrRVar > &inners, const std::vector< Expr > &factors, TailStrategy tail=TailStrategy::Auto)

Stage &	tile (const std::vector< VarOrRVar > &previous, const std::vector< VarOrRVar > &inners, const std::vector< Expr > &factors, TailStrategy tail=TailStrategy::Auto)

Stage &	reorder (const std::vector< VarOrRVar > &vars)

template<typename... Args>
HALIDE_NO_USER_CODE_INLINE std::enable_if< Internal::all_are_convertible< VarOrRVar, Args... >::value, Stage & >::type	reorder (const VarOrRVar &x, const VarOrRVar &y, Args &&...args)

template<typename... Args>
HALIDE_NO_USER_CODE_INLINE std::enable_if< Internal::all_are_convertible< VarOrRVar, Args... >::value, Stage & >::type	never_partition (const VarOrRVar &x, Args &&...args)

template<typename... Args>
HALIDE_NO_USER_CODE_INLINE std::enable_if< Internal::all_are_convertible< VarOrRVar, Args... >::value, Stage & >::type	always_partition (const VarOrRVar &x, Args &&...args)

Stage &	rename (const VarOrRVar &old_name, const VarOrRVar &new_name)

Stage	specialize (const Expr &condition)

void	specialize_fail (const std::string &message)

Stage &	gpu_threads (const VarOrRVar &thread_x, DeviceAPI device_api=DeviceAPI::Default_GPU)

Stage &	gpu_threads (const VarOrRVar &thread_x, const VarOrRVar &thread_y, DeviceAPI device_api=DeviceAPI::Default_GPU)

Stage &	gpu_threads (const VarOrRVar &thread_x, const VarOrRVar &thread_y, const VarOrRVar &thread_z, DeviceAPI device_api=DeviceAPI::Default_GPU)

Stage &	gpu_lanes (const VarOrRVar &thread_x, DeviceAPI device_api=DeviceAPI::Default_GPU)

Stage &	gpu_single_thread (DeviceAPI device_api=DeviceAPI::Default_GPU)

Stage &	gpu_blocks (const VarOrRVar &block_x, DeviceAPI device_api=DeviceAPI::Default_GPU)

Stage &	gpu_blocks (const VarOrRVar &block_x, const VarOrRVar &block_y, DeviceAPI device_api=DeviceAPI::Default_GPU)

Stage &	gpu_blocks (const VarOrRVar &block_x, const VarOrRVar &block_y, const VarOrRVar &block_z, DeviceAPI device_api=DeviceAPI::Default_GPU)

Stage &	gpu (const VarOrRVar &block_x, const VarOrRVar &thread_x, DeviceAPI device_api=DeviceAPI::Default_GPU)

Stage &	gpu (const VarOrRVar &block_x, const VarOrRVar &block_y, const VarOrRVar &thread_x, const VarOrRVar &thread_y, DeviceAPI device_api=DeviceAPI::Default_GPU)

Stage &	gpu (const VarOrRVar &block_x, const VarOrRVar &block_y, const VarOrRVar &block_z, const VarOrRVar &thread_x, const VarOrRVar &thread_y, const VarOrRVar &thread_z, DeviceAPI device_api=DeviceAPI::Default_GPU)

Stage &	gpu_tile (const VarOrRVar &x, const VarOrRVar &bx, const VarOrRVar &tx, const Expr &x_size, TailStrategy tail=TailStrategy::Auto, DeviceAPI device_api=DeviceAPI::Default_GPU)

Stage &	gpu_tile (const VarOrRVar &x, const VarOrRVar &tx, const Expr &x_size, TailStrategy tail=TailStrategy::Auto, DeviceAPI device_api=DeviceAPI::Default_GPU)

Stage &	gpu_tile (const VarOrRVar &x, const VarOrRVar &y, const VarOrRVar &bx, const VarOrRVar &by, const VarOrRVar &tx, const VarOrRVar &ty, const Expr &x_size, const Expr &y_size, TailStrategy tail=TailStrategy::Auto, DeviceAPI device_api=DeviceAPI::Default_GPU)

Stage &	gpu_tile (const VarOrRVar &x, const VarOrRVar &y, const VarOrRVar &tx, const VarOrRVar &ty, const Expr &x_size, const Expr &y_size, TailStrategy tail=TailStrategy::Auto, DeviceAPI device_api=DeviceAPI::Default_GPU)

Stage &	gpu_tile (const VarOrRVar &x, const VarOrRVar &y, const VarOrRVar &z, const VarOrRVar &bx, const VarOrRVar &by, const VarOrRVar &bz, const VarOrRVar &tx, const VarOrRVar &ty, const VarOrRVar &tz, const Expr &x_size, const Expr &y_size, const Expr &z_size, TailStrategy tail=TailStrategy::Auto, DeviceAPI device_api=DeviceAPI::Default_GPU)

Stage &	gpu_tile (const VarOrRVar &x, const VarOrRVar &y, const VarOrRVar &z, const VarOrRVar &tx, const VarOrRVar &ty, const VarOrRVar &tz, const Expr &x_size, const Expr &y_size, const Expr &z_size, TailStrategy tail=TailStrategy::Auto, DeviceAPI device_api=DeviceAPI::Default_GPU)

Stage &	allow_race_conditions ()

Stage &	atomic (bool override_associativity_test=false)

Stage &	hexagon (const VarOrRVar &x=Var::outermost())

Stage &	prefetch (const Func &f, const VarOrRVar &at, const VarOrRVar &from, Expr offset=1, PrefetchBoundStrategy strategy=PrefetchBoundStrategy::GuardWithIf)

Stage &	prefetch (const Parameter &param, const VarOrRVar &at, const VarOrRVar &from, Expr offset=1, PrefetchBoundStrategy strategy=PrefetchBoundStrategy::GuardWithIf)

template<typename T >
Stage &	prefetch (const T &image, const VarOrRVar &at, const VarOrRVar &from, Expr offset=1, PrefetchBoundStrategy strategy=PrefetchBoundStrategy::GuardWithIf)

void	unscheduled ()
	Assert that this stage has intentionally been given no schedule, and suppress the warning about unscheduled update definitions that would otherwise fire.

Detailed Description

A single definition of a Func.

May be a pure or update definition.

Definition at line 69 of file Func.h.

Constructor & Destructor Documentation

◆ Stage()

Halide::Stage::Stage	(	Internal::Function	f,
		Internal::Definition	d,
		size_t	stage_index )

inline

Definition at line 93 of file Func.h.

References Halide::Internal::Definition::args(), Halide::Internal::Function::args(), Halide::Internal::Definition::defined(), and internal_assert.

Member Function Documentation

◆ get_schedule()

const Internal::StageSchedule & Halide::Stage::get_schedule ( ) const

inline

Return the current StageSchedule associated with this Stage.

For introspection only: to modify schedule, use the Func interface.

Definition at line 106 of file Func.h.

References Halide::Internal::Definition::schedule().

Referenced by do_cost_model_schedule(), and Halide::Func::get_schedule().

◆ dump_argument_list()

std::string Halide::Stage::dump_argument_list ( ) const

Return a string describing the current var list taking into account all the splits, reorders, and tiles.

◆ name()

std::string Halide::Stage::name ( ) const

Return the name of this stage, e.g.

"f.update(2)"

◆ rfactor() [1/2]

Func Halide::Stage::rfactor ( std::vector< std::pair< RVar, Var > > preserved )

Calling rfactor() on an associative update definition a Func will split the update into an intermediate which computes the partial results and replaces the current update definition with a new definition which merges the partial results.

If called on a init/pure definition, this will throw an error. rfactor() will automatically infer the associative reduction operator and identity of the operator. If it can't prove the operation is associative or if it cannot find an identity for that operator, this will throw an error. In addition, commutativity of the operator is required if rfactor() is called on the inner dimension but excluding the outer dimensions.

rfactor() takes as input 'preserved', which is a list of <RVar, Var> pairs. The rvars not listed in 'preserved' are removed from the original Func and are lifted to the intermediate Func. The remaining rvars (the ones in 'preserved') are made pure in the intermediate Func. The intermediate Func's update definition inherits all scheduling directives (e.g. split,fuse, etc.) applied to the original Func's update definition. The loop order of the intermediate Func's update definition is the same as the original, although the RVars in 'preserved' are replaced by the new pure Vars. The loop order of the intermediate Func's init definition from innermost to outermost is the args' order of the original Func's init definition followed by the new pure Vars.

The intermediate Func also inherits storage order from the original Func with the new pure Vars added to the outermost.

For example, f.update(0).rfactor({{r.y, u}}) would rewrite a pipeline like this:

f(x, y) = 0;

f(x, y) += g(r.x, r.y);

into a pipeline like this:

f_intm(x, y, u) = 0;
f_intm(x, y, u) += g(r.x, u);
 
f(x, y) = 0;
f(x, y) += f_intm(x, y, r.y);

This has a variety of uses. You can use it to split computation of an associative reduction:

f(x, y) = 10;
RDom r(0, 96);
f(x, y) = max(f(x, y), g(x, y, r.x));
f.update(0).split(r.x, rxo, rxi, 8).reorder(y, x).parallel(x);
f.update(0).rfactor({{rxo, u}}).compute_root().parallel(u).update(0).parallel(u);

, which is equivalent to:

parallel for u = 0 to 11:
  for y:
    for x:
      f_intm(x, y, u) = -inf
parallel for x:
  for y:
    parallel for u = 0 to 11:
      for rxi = 0 to 7:
        f_intm(x, y, u) = max(f_intm(x, y, u), g(8*u + rxi))
for y:
  for x:
    f(x, y) = 10
parallel for x:
  for y:
    for rxo = 0 to 11:
      f(x, y) = max(f(x, y), f_intm(x, y, rxo))

◆ rfactor() [2/2]

Func Halide::Stage::rfactor	(	const RVar &	r,
		const Var &	v )

◆ compute_with() [1/4]

Stage & Halide::Stage::compute_with	(	LoopLevel	loop_level,
		const std::vector< std::pair< VarOrRVar, LoopAlignStrategy > > &	align )

Schedule the iteration over this stage to be fused with another stage 's' from outermost loop to a given LoopLevel.

'this' stage will be computed AFTER 's' in the innermost fused dimension. There should not be any dependencies between those two fused stages. If either of the stages being fused is a stage of an extern Func, this will throw an error.

Note that the two stages that are fused together should have the same exact schedule from the outermost to the innermost fused dimension, and the stage we are calling compute_with on should not have specializations, e.g. f2.compute_with(f1, x) is allowed only if f2 has no specializations.

Also, if a producer is desired to be computed at the fused loop level, the function passed to the compute_at() needs to be the "parent". Consider the following code:

input(x, y) = x + y;
f(x, y) = input(x, y);
f(x, y) += 5;
g(x, y) = x - y;
g(x, y) += 10;
f.compute_with(g, y);
f.update().compute_with(g.update(), y);

To compute 'input' at the fused loop level at dimension y, we specify input.compute_at(g, y) instead of input.compute_at(f, y) since 'g' is the "parent" for this fused loop (i.e. 'g' is computed first before 'f' is computed). On the other hand, to compute 'input' at the innermost dimension of 'f', we specify input.compute_at(f, x) instead of input.compute_at(g, x) since the x dimension of 'f' is not fused (only the y dimension is).

Given the constraints, this has a variety of uses. Consider the following code:

f(x, y) = x + y;
g(x, y) = x - y;
h(x, y) = f(x, y) + g(x, y);
f.compute_root();
g.compute_root();
f.split(x, xo, xi, 8);
g.split(x, xo, xi, 8);
g.compute_with(f, xo);

This is equivalent to:

for y:
  for xo:
    for xi:
      f(8*xo + xi) = (8*xo + xi) + y
    for xi:
      g(8*xo + xi) = (8*xo + xi) - y
for y:
  for x:
    h(x, y) = f(x, y) + g(x, y)

The size of the dimensions of the stages computed_with do not have to match. Consider the following code where 'g' is half the size of 'f':

Image<int> f_im(size, size), g_im(size/2, size/2);
input(x, y) = x + y;
f(x, y) = input(x, y);
g(x, y) = input(2*x, 2*y);
g.compute_with(f, y);
input.compute_at(f, y);
Pipeline({f, g}).realize({f_im, g_im});

This is equivalent to:

for y = 0 to size-1:
  for x = 0 to size-1:
    input(x, y) = x + y;
  for x = 0 to size-1:
    f(x, y) = input(x, y)
  for x = 0 to size/2-1:
    if (y < size/2-1):
      g(x, y) = input(2*x, 2*y)

'align' specifies how the loop iteration of each dimension of the two stages being fused should be aligned in the fused loop nests (see LoopAlignStrategy for options). Consider the following loop nests:

for z = f_min_z to f_max_z:
  for y = f_min_y to f_max_y:
    for x = f_min_x to f_max_x:
      f(x, y, z) = x + y + z
for z = g_min_z to g_max_z:
  for y = g_min_y to g_max_y:
    for x = g_min_x to g_max_x:
      g(x, y, z) = x - y - z

If no alignment strategy is specified, the following loop nest will be generated:

for z = min(f_min_z, g_min_z) to max(f_max_z, g_max_z):
  for y = min(f_min_y, g_min_y) to max(f_max_y, g_max_y):
    for x = f_min_x to f_max_x:
      if (f_min_z <= z <= f_max_z):
        if (f_min_y <= y <= f_max_y):
          f(x, y, z) = x + y + z
    for x = g_min_x to g_max_x:
      if (g_min_z <= z <= g_max_z):
        if (g_min_y <= y <= g_max_y):
          g(x, y, z) = x - y - z

Instead, these alignment strategies:

g.compute_with(f, y, {{z, LoopAlignStrategy::AlignStart}, {y, LoopAlignStrategy::AlignEnd}});

Halide::LoopAlignStrategy::AlignEnd

@ AlignEnd

Shift the end of the fused loops to align.

Halide::LoopAlignStrategy::AlignStart

@ AlignStart

Shift the start of the fused loops to align.

will produce the following loop nest:

f_loop_min_z = f_min_z
f_loop_max_z = max(f_max_z, (f_min_z - g_min_z) + g_max_z)
for z = f_min_z to f_loop_max_z:
  f_loop_min_y = min(f_min_y, (f_max_y - g_max_y) + g_min_y)
  f_loop_max_y = f_max_y
  for y = f_loop_min_y to f_loop_max_y:
    for x = f_min_x to f_max_x:
      if (f_loop_min_z <= z <= f_loop_max_z):
        if (f_loop_min_y <= y <= f_loop_max_y):
          f(x, y, z) = x + y + z
    for x = g_min_x to g_max_x:
      g_shift_z = g_min_z - f_loop_min_z
      g_shift_y = g_max_y - f_loop_max_y
      if (g_min_z <= (z + g_shift_z) <= g_max_z):
        if (g_min_y <= (y + g_shift_y) <= g_max_y):
          g(x, y + g_shift_y, z + g_shift_z) = x - (y + g_shift_y) - (z + g_shift_z)

LoopAlignStrategy::AlignStart on dimension z will shift the loop iteration of 'g' at dimension z so that its starting value matches that of 'f'. Likewise, LoopAlignStrategy::AlignEnd on dimension y will shift the loop iteration of 'g' at dimension y so that its end value matches that of 'f'.

◆ compute_with() [2/4]

Stage & Halide::Stage::compute_with	(	LoopLevel	loop_level,
		LoopAlignStrategy	align = LoopAlignStrategy::Auto )

◆ compute_with() [3/4]

Stage & Halide::Stage::compute_with	(	const Stage &	s,
		const VarOrRVar &	var,
		const std::vector< std::pair< VarOrRVar, LoopAlignStrategy > > &	align )

◆ compute_with() [4/4]

Stage & Halide::Stage::compute_with	(	const Stage &	s,
		const VarOrRVar &	var,
		LoopAlignStrategy	align = LoopAlignStrategy::Auto )

◆ split()

Stage & Halide::Stage::split	(	const VarOrRVar &	old,
		const VarOrRVar &	outer,
		const VarOrRVar &	inner,
		const Expr &	factor,
		TailStrategy	tail = TailStrategy::Auto )

Scheduling calls that control how the domain of this stage is traversed.

See the documentation for Func for the meanings.

◆ fuse()

Stage & Halide::Stage::fuse	(	const VarOrRVar &	inner,
		const VarOrRVar &	outer,
		const VarOrRVar &	fused )

◆ serial()

Stage & Halide::Stage::serial ( const VarOrRVar & var )

◆ parallel() [1/2]

Stage & Halide::Stage::parallel ( const VarOrRVar & var )

Referenced by do_cost_model_schedule().

◆ vectorize() [1/2]

Stage & Halide::Stage::vectorize ( const VarOrRVar & var )

Examples: tutorial/lesson_09_update_definitions.cpp.

Referenced by do_cost_model_schedule().

◆ unroll() [1/2]

Stage & Halide::Stage::unroll ( const VarOrRVar & var )

Referenced by do_cost_model_schedule().

◆ parallel() [2/2]

Stage & Halide::Stage::parallel	(	const VarOrRVar &	var,
		const Expr &	task_size,
		TailStrategy	tail = TailStrategy::Auto )

◆ vectorize() [2/2]

Stage & Halide::Stage::vectorize	(	const VarOrRVar &	var,
		const Expr &	factor,
		TailStrategy	tail = TailStrategy::Auto )

◆ unroll() [2/2]

Stage & Halide::Stage::unroll	(	const VarOrRVar &	var,
		const Expr &	factor,
		TailStrategy	tail = TailStrategy::Auto )

◆ partition()

Stage & Halide::Stage::partition	(	const VarOrRVar &	var,
		Partition	partition_policy )

◆ never_partition_all()

Stage & Halide::Stage::never_partition_all ( )

◆ never_partition() [1/2]

Stage & Halide::Stage::never_partition ( const std::vector< VarOrRVar > & vars )

Referenced by never_partition().

◆ always_partition_all()

Stage & Halide::Stage::always_partition_all ( )

◆ always_partition() [1/2]

Stage & Halide::Stage::always_partition ( const std::vector< VarOrRVar > & vars )

Referenced by always_partition().

◆ tile() [1/5]

Stage & Halide::Stage::tile	(	const VarOrRVar &	x,
		const VarOrRVar &	y,
		const VarOrRVar &	xo,
		const VarOrRVar &	yo,
		const VarOrRVar &	xi,
		const VarOrRVar &	yi,
		const Expr &	xfactor,
		const Expr &	yfactor,
		TailStrategy	tail = TailStrategy::Auto )

◆ tile() [2/5]

Stage & Halide::Stage::tile	(	const VarOrRVar &	x,
		const VarOrRVar &	y,
		const VarOrRVar &	xi,
		const VarOrRVar &	yi,
		const Expr &	xfactor,
		const Expr &	yfactor,
		TailStrategy	tail = TailStrategy::Auto )

◆ tile() [3/5]

Stage & Halide::Stage::tile	(	const std::vector< VarOrRVar > &	previous,
		const std::vector< VarOrRVar > &	outers,
		const std::vector< VarOrRVar > &	inners,
		const std::vector< Expr > &	factors,
		const std::vector< TailStrategy > &	tails )

◆ tile() [4/5]

Stage & Halide::Stage::tile	(	const std::vector< VarOrRVar > &	previous,
		const std::vector< VarOrRVar > &	outers,
		const std::vector< VarOrRVar > &	inners,
		const std::vector< Expr > &	factors,
		TailStrategy	tail = TailStrategy::Auto )

◆ tile() [5/5]

Stage & Halide::Stage::tile	(	const std::vector< VarOrRVar > &	previous,
		const std::vector< VarOrRVar > &	inners,
		const std::vector< Expr > &	factors,
		TailStrategy	tail = TailStrategy::Auto )

◆ reorder() [1/2]

Stage & Halide::Stage::reorder ( const std::vector< VarOrRVar > & vars )

Referenced by do_cost_model_schedule(), and reorder().

◆ reorder() [2/2]

template<typename... Args>

HALIDE_NO_USER_CODE_INLINE std::enable_if< Internal::all_are_convertible< VarOrRVar, Args... >::value, Stage & >::type Halide::Stage::reorder	(	const VarOrRVar &	x,
		const VarOrRVar &	y,
		Args &&...	args )

inline

Definition at line 383 of file Func.h.

References reorder().

◆ never_partition() [2/2]

template<typename... Args>

HALIDE_NO_USER_CODE_INLINE std::enable_if< Internal::all_are_convertible< VarOrRVar, Args... >::value, Stage & >::type Halide::Stage::never_partition	(	const VarOrRVar &	x,
		Args &&...	args )

inline

Definition at line 390 of file Func.h.

References never_partition().

◆ always_partition() [2/2]

template<typename... Args>

HALIDE_NO_USER_CODE_INLINE std::enable_if< Internal::all_are_convertible< VarOrRVar, Args... >::value, Stage & >::type Halide::Stage::always_partition	(	const VarOrRVar &	x,
		Args &&...	args )

inline

Definition at line 397 of file Func.h.

References always_partition().

◆ rename()

Stage & Halide::Stage::rename	(	const VarOrRVar &	old_name,
		const VarOrRVar &	new_name )

◆ specialize()

Stage Halide::Stage::specialize ( const Expr & condition )

◆ specialize_fail()

void Halide::Stage::specialize_fail ( const std::string & message )

◆ gpu_threads() [1/3]

Stage & Halide::Stage::gpu_threads	(	const VarOrRVar &	thread_x,
		DeviceAPI	device_api = DeviceAPI::Default_GPU )

◆ gpu_threads() [2/3]

Stage & Halide::Stage::gpu_threads	(	const VarOrRVar &	thread_x,
		const VarOrRVar &	thread_y,
		DeviceAPI	device_api = DeviceAPI::Default_GPU )

◆ gpu_threads() [3/3]

Stage & Halide::Stage::gpu_threads	(	const VarOrRVar &	thread_x,
		const VarOrRVar &	thread_y,
		const VarOrRVar &	thread_z,
		DeviceAPI	device_api = DeviceAPI::Default_GPU )

◆ gpu_lanes()

Stage & Halide::Stage::gpu_lanes	(	const VarOrRVar &	thread_x,
		DeviceAPI	device_api = DeviceAPI::Default_GPU )

◆ gpu_single_thread()

Stage & Halide::Stage::gpu_single_thread ( DeviceAPI device_api = DeviceAPI::Default_GPU )

◆ gpu_blocks() [1/3]

Stage & Halide::Stage::gpu_blocks	(	const VarOrRVar &	block_x,
		DeviceAPI	device_api = DeviceAPI::Default_GPU )

◆ gpu_blocks() [2/3]

Stage & Halide::Stage::gpu_blocks	(	const VarOrRVar &	block_x,
		const VarOrRVar &	block_y,
		DeviceAPI	device_api = DeviceAPI::Default_GPU )

◆ gpu_blocks() [3/3]

Stage & Halide::Stage::gpu_blocks	(	const VarOrRVar &	block_x,
		const VarOrRVar &	block_y,
		const VarOrRVar &	block_z,
		DeviceAPI	device_api = DeviceAPI::Default_GPU )

◆ gpu() [1/3]

Stage & Halide::Stage::gpu	(	const VarOrRVar &	block_x,
		const VarOrRVar &	thread_x,
		DeviceAPI	device_api = DeviceAPI::Default_GPU )

◆ gpu() [2/3]

Stage & Halide::Stage::gpu	(	const VarOrRVar &	block_x,
		const VarOrRVar &	block_y,
		const VarOrRVar &	thread_x,
		const VarOrRVar &	thread_y,
		DeviceAPI	device_api = DeviceAPI::Default_GPU )

◆ gpu() [3/3]

Stage & Halide::Stage::gpu	(	const VarOrRVar &	block_x,
		const VarOrRVar &	block_y,
		const VarOrRVar &	block_z,
		const VarOrRVar &	thread_x,
		const VarOrRVar &	thread_y,
		const VarOrRVar &	thread_z,
		DeviceAPI	device_api = DeviceAPI::Default_GPU )

◆ gpu_tile() [1/6]

Stage & Halide::Stage::gpu_tile	(	const VarOrRVar &	x,
		const VarOrRVar &	bx,
		const VarOrRVar &	tx,
		const Expr &	x_size,
		TailStrategy	tail = TailStrategy::Auto,
		DeviceAPI	device_api = DeviceAPI::Default_GPU )

◆ gpu_tile() [2/6]

Stage & Halide::Stage::gpu_tile	(	const VarOrRVar &	x,
		const VarOrRVar &	tx,
		const Expr &	x_size,
		TailStrategy	tail = TailStrategy::Auto,
		DeviceAPI	device_api = DeviceAPI::Default_GPU )

◆ gpu_tile() [3/6]

Stage & Halide::Stage::gpu_tile	(	const VarOrRVar &	x,
		const VarOrRVar &	y,
		const VarOrRVar &	bx,
		const VarOrRVar &	by,
		const VarOrRVar &	tx,
		const VarOrRVar &	ty,
		const Expr &	x_size,
		const Expr &	y_size,
		TailStrategy	tail = TailStrategy::Auto,
		DeviceAPI	device_api = DeviceAPI::Default_GPU )

◆ gpu_tile() [4/6]

Stage & Halide::Stage::gpu_tile	(	const VarOrRVar &	x,
		const VarOrRVar &	y,
		const VarOrRVar &	tx,
		const VarOrRVar &	ty,
		const Expr &	x_size,
		const Expr &	y_size,
		TailStrategy	tail = TailStrategy::Auto,
		DeviceAPI	device_api = DeviceAPI::Default_GPU )

◆ gpu_tile() [5/6]

Stage & Halide::Stage::gpu_tile	(	const VarOrRVar &	x,
		const VarOrRVar &	y,
		const VarOrRVar &	z,
		const VarOrRVar &	bx,
		const VarOrRVar &	by,
		const VarOrRVar &	bz,
		const VarOrRVar &	tx,
		const VarOrRVar &	ty,
		const VarOrRVar &	tz,
		const Expr &	x_size,
		const Expr &	y_size,
		const Expr &	z_size,
		TailStrategy	tail = TailStrategy::Auto,
		DeviceAPI	device_api = DeviceAPI::Default_GPU )

◆ gpu_tile() [6/6]

Stage & Halide::Stage::gpu_tile	(	const VarOrRVar &	x,
		const VarOrRVar &	y,
		const VarOrRVar &	z,
		const VarOrRVar &	tx,
		const VarOrRVar &	ty,
		const VarOrRVar &	tz,
		const Expr &	x_size,
		const Expr &	y_size,
		const Expr &	z_size,
		TailStrategy	tail = TailStrategy::Auto,
		DeviceAPI	device_api = DeviceAPI::Default_GPU )

◆ allow_race_conditions()

Stage & Halide::Stage::allow_race_conditions ( )

◆ atomic()

Stage & Halide::Stage::atomic ( bool override_associativity_test = false )

◆ hexagon()

Stage & Halide::Stage::hexagon ( const VarOrRVar & x = Var::outermost() )

◆ prefetch() [1/3]

Stage & Halide::Stage::prefetch	(	const Func &	f,
		const VarOrRVar &	at,
		const VarOrRVar &	from,
		Expr	offset = 1,
		PrefetchBoundStrategy	strategy = PrefetchBoundStrategy::GuardWithIf )

Referenced by prefetch().

◆ prefetch() [2/3]

Stage & Halide::Stage::prefetch	(	const Parameter &	param,
		const VarOrRVar &	at,
		const VarOrRVar &	from,
		Expr	offset = 1,
		PrefetchBoundStrategy	strategy = PrefetchBoundStrategy::GuardWithIf )

◆ prefetch() [3/3]

template<typename T >

Stage & Halide::Stage::prefetch	(	const T &	image,
		const VarOrRVar &	at,
		const VarOrRVar &	from,
		Expr	offset = 1,
		PrefetchBoundStrategy	strategy = PrefetchBoundStrategy::GuardWithIf )

inline

Definition at line 468 of file Func.h.

References prefetch().

◆ unscheduled()

void Halide::Stage::unscheduled ( )

Assert that this stage has intentionally been given no schedule, and suppress the warning about unscheduled update definitions that would otherwise fire.

This counts as a schedule, so calling this twice on the same Stage will fail the assertion.

The documentation for this class was generated from the following file:

src/Func.h

Public Member Functions

Detailed Description

Constructor & Destructor Documentation

◆ Stage()

Member Function Documentation

◆ get_schedule()

◆ dump_argument_list()

◆ name()

◆ rfactor() [1/2]

◆ rfactor() [2/2]

◆ compute_with() [1/4]

◆ compute_with() [2/4]

◆ compute_with() [3/4]

◆ compute_with() [4/4]

◆ split()

◆ fuse()

◆ serial()

◆ parallel() [1/2]

◆ vectorize() [1/2]

◆ unroll() [1/2]

◆ parallel() [2/2]

◆ vectorize() [2/2]

◆ unroll() [2/2]

◆ partition()

◆ never_partition_all()

◆ never_partition() [1/2]

◆ always_partition_all()

◆ always_partition() [1/2]

◆ tile() [1/5]

◆ tile() [2/5]

◆ tile() [3/5]

◆ tile() [4/5]

◆ tile() [5/5]

◆ reorder() [1/2]

◆ reorder() [2/2]

◆ never_partition() [2/2]

◆ always_partition() [2/2]

◆ rename()

◆ specialize()

◆ specialize_fail()

◆ gpu_threads() [1/3]

◆ gpu_threads() [2/3]

◆ gpu_threads() [3/3]

◆ gpu_lanes()

◆ gpu_single_thread()

◆ gpu_blocks() [1/3]

◆ gpu_blocks() [2/3]

◆ gpu_blocks() [3/3]

◆ gpu() [1/3]

◆ gpu() [2/3]

◆ gpu() [3/3]

◆ gpu_tile() [1/6]

◆ gpu_tile() [2/6]

◆ gpu_tile() [3/6]

◆ gpu_tile() [4/6]

◆ gpu_tile() [5/6]

◆ gpu_tile() [6/6]

◆ allow_race_conditions()

◆ atomic()

◆ hexagon()

◆ prefetch() [1/3]

◆ prefetch() [2/3]

◆ prefetch() [3/3]

◆ unscheduled()