25class OutputImageParam;
47 const std::string &
name()
const {
77 std::vector<Var> dim_vars;
81 void split(
const std::string &old,
const std::string &outer,
const std::string &inner,
83 void remove(
const std::string &var);
86 const std::vector<Internal::StorageDim> &storage_dims()
const {
90 Stage &compute_with(
LoopLevel loop_level,
const std::map<std::string, LoopAlignStrategy> &align);
94 : function(std::move(f)), definition(std::move(d)), stage_index(stage_index) {
97 dim_vars.reserve(function.
args().size());
98 for (
const auto &arg : function.
args()) {
99 dim_vars.emplace_back(arg);
363 const Expr &xfactor,
const Expr &yfactor,
366 const std::vector<VarOrRVar> &outers,
367 const std::vector<VarOrRVar> &inners,
368 const std::vector<Expr> &factors,
369 const std::vector<TailStrategy> &tails);
371 const std::vector<VarOrRVar> &outers,
372 const std::vector<VarOrRVar> &inners,
373 const std::vector<Expr> &factors,
376 const std::vector<VarOrRVar> &inners,
377 const std::vector<Expr> &factors,
381 template<
typename... Args>
384 std::vector<VarOrRVar> collected_args{x, y, std::forward<Args>(args)...};
385 return reorder(collected_args);
388 template<
typename... Args>
391 std::vector<VarOrRVar> collected_args{x, std::forward<Args>(args)...};
395 template<
typename... Args>
398 std::vector<VarOrRVar> collected_args{x, std::forward<Args>(args)...};
436 const Expr &x_size,
const Expr &y_size,
442 const Expr &x_size,
const Expr &y_size,
449 const Expr &x_size,
const Expr &y_size,
const Expr &z_size,
454 const Expr &x_size,
const Expr &y_size,
const Expr &z_size,
470 return prefetch(image.parameter(), at, from, std::move(offset), strategy);
493 int implicit_placeholder_pos;
495 std::vector<Expr> args;
496 std::vector<Expr> args_with_implicit_vars(
const std::vector<Expr> &e)
const;
501 template<
typename BinaryOp>
502 Stage func_ref_update(
const Tuple &e,
int init_val);
507 template<
typename BinaryOp>
508 Stage func_ref_update(
Expr e,
int init_val);
512 int placeholder_pos = -1,
int count = 0);
514 int placeholder_pos = -1,
int count = 0);
612 std::vector<Expr> args;
617 Tuple values_with_undefs(
const Expr &e)
const;
711 std::pair<int, int> add_implicit_vars(std::vector<Var> &)
const;
712 std::pair<int, int> add_implicit_vars(std::vector<Expr> &)
const;
723 Func &reorder_storage(
const std::vector<Var> &dims,
size_t start);
725 void invalidate_cache();
735 explicit Func(
const Type &required_type,
int required_dims,
const std::string &
name);
742 explicit Func(
const std::vector<Type> &required_types,
int required_dims,
const std::string &
name);
758 template<
typename T,
int Dims>
841 std::vector<int32_t> sizes = {},
878 const std::vector<int32_t> &sizes,
889 void compile_to_bitcode(
const std::string &filename,
const std::vector<Argument> &,
const std::string &fn_name,
912 void compile_to_object(
const std::string &filename,
const std::vector<Argument> &,
const std::string &fn_name,
925 void compile_to_header(
const std::string &filename,
const std::vector<Argument> &,
const std::string &fn_name =
"",
934 void compile_to_assembly(
const std::string &filename,
const std::vector<Argument> &,
const std::string &fn_name,
945 const std::vector<Argument> &,
946 const std::string &fn_name =
"",
953 const std::vector<Argument> &
args,
961 const std::vector<Argument> &
args,
975 const std::string &fn_name =
"",
983 const std::string &fn_name =
"",
994 const std::vector<Argument> &
args,
995 const std::vector<Target> &targets);
1011 const std::vector<Argument> &
args,
1012 const std::vector<Target> &targets,
1013 const std::vector<std::string> &suffixes);
1024 void compile_to(
const std::map<OutputFileType, std::string> &output_files,
1025 const std::vector<Argument> &
args,
1026 const std::string &fn_name,
1058 template<
typename T>
1137 std::vector<RVar>
rvars(
int idx = 0)
const;
1155 const std::vector<ExternFuncArgument> ¶ms,
Type t,
1165 const std::vector<ExternFuncArgument> ¶ms,
1166 const std::vector<Type> &
types,
int dimensionality,
1173 const std::vector<ExternFuncArgument> ¶ms,
1174 const std::vector<Type> &
types,
int dimensionality,
1183 const std::vector<ExternFuncArgument> ¶ms,
Type t,
1184 const std::vector<Var> &arguments,
1187 define_extern(function_name, params, std::vector<Type>{t}, arguments,
1188 mangling, device_api);
1192 const std::vector<ExternFuncArgument> ¶ms,
1193 const std::vector<Type> &
types,
1194 const std::vector<Var> &arguments,
1237 template<
typename... Args>
1240 std::vector<Var> collected_args{std::forward<Args>(
args)...};
1254 template<
typename... Args>
1257 std::vector<Expr> collected_args{x, std::forward<Args>(
args)...};
1258 return (*
this)(collected_args);
1479 template<
typename... Args>
1482 std::vector<VarOrRVar> collected_args{x, std::forward<Args>(
args)...};
1496 template<
typename... Args>
1499 std::vector<VarOrRVar> collected_args{x, std::forward<Args>(
args)...};
1566 const Expr &xfactor,
const Expr &yfactor,
1573 const Expr &xfactor,
const Expr &yfactor,
1578 const std::vector<VarOrRVar> &outers,
1579 const std::vector<VarOrRVar> &inners,
1580 const std::vector<Expr> &factors,
1581 const std::vector<TailStrategy> &tails);
1585 const std::vector<VarOrRVar> &outers,
1586 const std::vector<VarOrRVar> &inners,
1587 const std::vector<Expr> &factors,
1592 const std::vector<VarOrRVar> &inners,
1593 const std::vector<Expr> &factors,
1600 template<
typename... Args>
1603 std::vector<VarOrRVar> collected_args{x, y, std::forward<Args>(
args)...};
1604 return reorder(collected_args);
1939 const Expr &x_size,
const Expr &y_size,
1945 const Expr &x_size,
const Expr &y_size,
1952 const Expr &x_size,
const Expr &y_size,
const Expr &z_size,
1957 const Expr &x_size,
const Expr &y_size,
const Expr &z_size,
2051 template<
typename T>
2054 return prefetch(image.parameter(), at, from, std::move(offset), strategy);
2076 template<
typename... Args>
2079 std::vector<Var> collected_args{x, y, std::forward<Args>(
args)...};
2080 return reorder_storage(collected_args);
2615template<
typename Last>
2617 using T =
typename std::remove_pointer<typename std::remove_reference<Last>::type>::type;
2619 <<
"Can't evaluate expression "
2620 << t[idx] <<
" of type " << t[idx].type()
2621 <<
" as a scalar of type " <<
type_of<T>() <<
"\n";
2624template<
typename First,
typename Second,
typename... Rest>
2630template<
typename Last>
2632 using T =
typename std::remove_pointer<typename std::remove_reference<Last>::type>::type;
2636template<
typename First,
typename Second,
typename... Rest>
2650 <<
"Can't evaluate expression "
2651 << e <<
" of type " << e.
type()
2652 <<
" as a scalar of type " <<
type_of<T>() <<
"\n";
2666template<
typename First,
typename... Rest>
2677template<
typename First,
typename... Rest>
2679 evaluate<First, Rest...>(
nullptr, std::move(t), std::forward<First>(first), std::forward<Rest...>(rest...));
2704 <<
"Can't evaluate expression "
2705 << e <<
" of type " << e.
type()
2706 <<
" as a scalar of type " <<
type_of<T>() <<
"\n";
2717template<
typename First,
typename... Rest>
Defines a type used for expressing the type signature of a generated halide pipeline.
#define internal_assert(c)
Base classes for Halide expressions (Halide::Expr) and statements (Halide::Internal::Stmt)
Defines the struct representing lifetime and dependencies of a JIT compiled halide pipeline.
Defines Module, an IR container that fully describes a Halide program.
Classes for declaring scalar parameters to halide pipelines.
Defines the front-end class representing an entire Halide imaging pipeline.
Defines the front-end syntax for reduction domains and reduction variables.
Defines the structure that describes a Halide target.
Defines Tuple - the front-end handle on small arrays of expressions.
#define HALIDE_NO_USER_CODE_INLINE
Defines the Var - the front-end variable.
A Halide::Buffer is a named shared reference to a Halide::Runtime::Buffer.
Helper class for identifying purpose of an Expr passed to memoize.
EvictionKey(const Expr &expr=Expr())
Func & gpu(const VarOrRVar &block_x, const VarOrRVar &thread_x, DeviceAPI device_api=DeviceAPI::Default_GPU)
Tell Halide that the following dimensions correspond to GPU block indices and thread indices.
Func & gpu_blocks(const VarOrRVar &block_x, DeviceAPI device_api=DeviceAPI::Default_GPU)
Tell Halide that the following dimensions correspond to GPU block indices.
Func & bound_extent(const Var &var, Expr extent)
Bound the extent of a Func's realization, but not its min.
void print_loop_nest()
Write out the loop nests specified by the schedule for this Function.
Func & unroll(const VarOrRVar &var, const Expr &factor, TailStrategy tail=TailStrategy::Auto)
Split a dimension by the given factor, then unroll the inner dimension.
bool is_extern() const
Is this function an external stage? That is, was it defined using define_extern?
FuncRef operator()(std::vector< Expr >) const
Either calls to the function, or the left-hand-side of an update definition (see RDom).
Func & gpu_tile(const VarOrRVar &x, const VarOrRVar &y, const VarOrRVar &tx, const VarOrRVar &ty, const Expr &x_size, const Expr &y_size, TailStrategy tail=TailStrategy::Auto, DeviceAPI device_api=DeviceAPI::Default_GPU)
Func(const std::string &name)
Declare a new undefined function with the given name.
void compile_to_multitarget_object_files(const std::string &filename_prefix, const std::vector< Argument > &args, const std::vector< Target > &targets, const std::vector< std::string > &suffixes)
Like compile_to_multitarget_static_library(), except that the object files are all output as object f...
Func & memoize(const EvictionKey &eviction_key=EvictionKey())
Use the halide_memoization_cache_... interface to store a computed version of this function across in...
Func & partition(const VarOrRVar &var, Partition partition_policy)
Set the loop partition policy.
Func & trace_stores()
Trace all stores to the buffer backing this Func by emitting calls to halide_trace.
Func & trace_loads()
Trace all loads from this Func by emitting calls to halide_trace.
Func & never_partition_all()
Set the loop partition policy to Never for all Vars and RVar of the initial definition of the Func.
Func & prefetch(const Parameter ¶m, const VarOrRVar &at, const VarOrRVar &from, Expr offset=1, PrefetchBoundStrategy strategy=PrefetchBoundStrategy::GuardWithIf)
void specialize_fail(const std::string &message)
Add a specialization to a Func that always terminates execution with a call to halide_error().
Func & vectorize(const VarOrRVar &var, const Expr &factor, TailStrategy tail=TailStrategy::Auto)
Split a dimension by the given factor, then vectorize the inner dimension.
Func & compute_at(const Func &f, const RVar &var)
Schedule a function to be computed within the iteration over some dimension of an update domain.
Func & tile(const std::vector< VarOrRVar > &previous, const std::vector< VarOrRVar > &outers, const std::vector< VarOrRVar > &inners, const std::vector< Expr > &factors, TailStrategy tail=TailStrategy::Auto)
The generalized tile, with a single tail strategy to apply to all vars.
void compile_to_assembly(const std::string &filename, const std::vector< Argument > &, const std::string &fn_name, const Target &target=get_target_from_environment())
Statically compile this function to text assembly equivalent to the object file generated by compile_...
Internal::Function function() const
Get a handle on the internal halide function that this Func represents.
bool has_update_definition() const
Does this function have at least one update definition?
void compile_jit(const Target &target=get_jit_target_from_environment())
Eagerly jit compile the function to machine code.
Func & compute_with(LoopLevel loop_level, const std::vector< std::pair< VarOrRVar, LoopAlignStrategy > > &align)
Func()
Declare a new undefined function with an automatically-generated unique name.
Func & store_in(MemoryType memory_type)
Set the type of memory this Func should be stored in.
const Type & type() const
Get the type(s) of the outputs of this Func.
void compile_to_bitcode(const std::string &filename, const std::vector< Argument > &, const Target &target=get_target_from_environment())
void infer_input_bounds(Pipeline::RealizationArg outputs, const Target &target=get_jit_target_from_environment())
Func & async()
Produce this Func asynchronously in a separate thread.
Func & reorder(const std::vector< VarOrRVar > &vars)
Reorder variables to have the given nesting order, from innermost out.
HALIDE_NO_USER_CODE_INLINE std::enable_if< Internal::all_are_convertible< Expr, Args... >::value, FuncRef >::type operator()(const Expr &x, Args &&...args) const
Func & gpu_blocks(const VarOrRVar &block_x, const VarOrRVar &block_y, const VarOrRVar &block_z, DeviceAPI device_api=DeviceAPI::Default_GPU)
Func & set_estimate(const Var &var, const Expr &min, const Expr &extent)
Statically declare the range over which the function will be evaluated in the general case.
Func & gpu_lanes(const VarOrRVar &thread_x, DeviceAPI device_api=DeviceAPI::Default_GPU)
The given dimension corresponds to the lanes in a GPU warp.
void compile_to_lowered_stmt(const std::string &filename, const std::vector< Argument > &args, StmtOutputFormat fmt=Text, const Target &target=get_target_from_environment())
Write out an internal representation of lowered code.
void compile_to_c(const std::string &filename, const std::vector< Argument > &, const std::string &fn_name="", const Target &target=get_target_from_environment())
Statically compile this function to C source code.
Stage update(int idx=0)
Get a handle on an update step for the purposes of scheduling it.
Func & gpu_blocks(const VarOrRVar &block_x, const VarOrRVar &block_y, DeviceAPI device_api=DeviceAPI::Default_GPU)
Func & gpu_threads(const VarOrRVar &thread_x, const VarOrRVar &thread_y, DeviceAPI device_api=DeviceAPI::Default_GPU)
Func(const Type &required_type, int required_dims, const std::string &name)
Declare a new undefined function with the given name.
bool defined() const
Does this function have at least a pure definition.
Func(const std::vector< Type > &required_types, int required_dims, const std::string &name)
Declare a new undefined function with the given name.
Func & align_storage(const Var &dim, const Expr &alignment)
Pad the storage extent of a particular dimension of realizations of this function up to be a multiple...
Func copy_to_host()
Declare that this function should be implemented by a call to halide_buffer_copy with a NULL target d...
void infer_input_bounds(JITUserContext *context, Pipeline::RealizationArg outputs, const Target &target=get_jit_target_from_environment())
Func & gpu_tile(const VarOrRVar &x, const VarOrRVar &tx, const Expr &x_size, TailStrategy tail=TailStrategy::Auto, DeviceAPI device_api=DeviceAPI::Default_GPU)
Func & serial(const VarOrRVar &var)
Mark a dimension to be traversed serially.
void compile_to_header(const std::string &filename, const std::vector< Argument > &, const std::string &fn_name="", const Target &target=get_target_from_environment())
Emit a header file with the given filename for this function.
Func & align_bounds(const Var &var, Expr modulus, Expr remainder=0)
Expand the region computed so that the min coordinates is congruent to 'remainder' modulo 'modulus',...
Func & reorder_storage(const Var &x, const Var &y)
Func & split(const VarOrRVar &old, const VarOrRVar &outer, const VarOrRVar &inner, const Expr &factor, TailStrategy tail=TailStrategy::Auto)
Split a dimension into inner and outer subdimensions with the given names, where the inner dimension ...
Func(const Expr &e)
Declare a new function with an automatically-generated unique name, and define it to return the given...
Func & tile(const VarOrRVar &x, const VarOrRVar &y, const VarOrRVar &xo, const VarOrRVar &yo, const VarOrRVar &xi, const VarOrRVar &yi, const Expr &xfactor, const Expr &yfactor, TailStrategy tail=TailStrategy::Auto)
Split two dimensions at once by the given factors, and then reorder the resulting dimensions to be xi...
int dimensions() const
The dimensionality (number of arguments) of this function.
Func & gpu(const VarOrRVar &block_x, const VarOrRVar &block_y, const VarOrRVar &thread_x, const VarOrRVar &thread_y, DeviceAPI device_api=DeviceAPI::Default_GPU)
void compile_to_conceptual_stmt(const std::string &filename, const std::vector< Argument > &args, StmtOutputFormat fmt=Text, const Target &target=get_target_from_environment())
Write out a conceptual representation of lowered code, before any parallel loop get factored out into...
const std::vector< Type > & types() const
Func & tile(const std::vector< VarOrRVar > &previous, const std::vector< VarOrRVar > &outers, const std::vector< VarOrRVar > &inners, const std::vector< Expr > &factors, const std::vector< TailStrategy > &tails)
A more general form of tile, which defines tiles of any dimensionality.
Func & gpu_tile(const VarOrRVar &x, const VarOrRVar &y, const VarOrRVar &z, const VarOrRVar &bx, const VarOrRVar &by, const VarOrRVar &bz, const VarOrRVar &tx, const VarOrRVar &ty, const VarOrRVar &tz, const Expr &x_size, const Expr &y_size, const Expr &z_size, TailStrategy tail=TailStrategy::Auto, DeviceAPI device_api=DeviceAPI::Default_GPU)
const std::vector< Expr > & update_args(int idx=0) const
Get the left-hand-side of the update definition.
Realization realize(JITUserContext *context, std::vector< int32_t > sizes={}, const Target &target=Target())
Same as above, but takes a custom user-provided context to be passed to runtime functions.
int outputs() const
Get the number of outputs of this Func.
HALIDE_NO_USER_CODE_INLINE std::enable_if< Internal::all_are_convertible< Var, Args... >::value, Func & >::type reorder_storage(const Var &x, const Var &y, Args &&...args)
Func & compute_root()
Compute all of this function once ahead of time.
Func & compute_with(LoopLevel loop_level, LoopAlignStrategy align=LoopAlignStrategy::Auto)
Func & trace_realizations()
Trace all realizations of this Func by emitting calls to halide_trace.
JITHandlers & jit_handlers()
Get a struct containing the currently set custom functions used by JIT.
std::vector< Var > args() const
Get the pure arguments.
Tuple update_values(int idx=0) const
Get the right-hand-side of an update definition for functions that returns multiple values.
Func & allow_race_conditions()
Specify that race conditions are permitted for this Func, which enables parallelizing over RVars even...
void compile_to_bitcode(const std::string &filename, const std::vector< Argument > &, const std::string &fn_name, const Target &target=get_target_from_environment())
Statically compile this function to llvm bitcode, with the given filename (which should probably end ...
HALIDE_NO_USER_CODE_INLINE std::enable_if< Internal::all_are_convertible< VarOrRVar, Args... >::value, Func & >::type reorder(const VarOrRVar &x, const VarOrRVar &y, Args &&...args)
int num_update_definitions() const
How many update definitions does this function have?
Func & tile(const VarOrRVar &x, const VarOrRVar &y, const VarOrRVar &xi, const VarOrRVar &yi, const Expr &xfactor, const Expr &yfactor, TailStrategy tail=TailStrategy::Auto)
A shorter form of tile, which reuses the old variable names as the new outer dimensions.
Func & never_partition(const std::vector< VarOrRVar > &vars)
Set the loop partition policy to Never for a vector of Vars and RVars.
Stage specialize(const Expr &condition)
Specialize a Func.
Callable compile_to_callable(const std::vector< Argument > &args, const Target &target=get_jit_target_from_environment())
Eagerly jit compile the function to machine code and return a callable struct that behaves like a fun...
Func & ring_buffer(Expr extent)
Expands the storage of the function by an extra dimension to enable ring buffering.
Func & compute_with(const Stage &s, const VarOrRVar &var, LoopAlignStrategy align=LoopAlignStrategy::Auto)
Func & store_at(LoopLevel loop_level)
Equivalent to the version of store_at that takes a Var, but schedules storage at a given LoopLevel.
Func & gpu_tile(const VarOrRVar &x, const VarOrRVar &y, const VarOrRVar &z, const VarOrRVar &tx, const VarOrRVar &ty, const VarOrRVar &tz, const Expr &x_size, const Expr &y_size, const Expr &z_size, TailStrategy tail=TailStrategy::Auto, DeviceAPI device_api=DeviceAPI::Default_GPU)
HALIDE_NO_USER_CODE_INLINE Func(Buffer< T, Dims > &im)
Construct a new Func to wrap a Buffer.
void define_extern(const std::string &function_name, const std::vector< ExternFuncArgument > ¶ms, const std::vector< Type > &types, const std::vector< Var > &arguments, NameMangling mangling=NameMangling::Default, DeviceAPI device_api=DeviceAPI::Host)
Func & compute_with(const Stage &s, const VarOrRVar &var, const std::vector< std::pair< VarOrRVar, LoopAlignStrategy > > &align)
Schedule the iteration over the initial definition of this function to be fused with another stage 's...
Expr value() const
The right-hand-side value of the pure definition of this function.
Func & align_extent(const Var &var, Expr modulus)
Expand the region computed so that the extent is a multiple of 'modulus'.
void infer_input_bounds(const std::vector< int32_t > &sizes, const Target &target=get_jit_target_from_environment())
For a given size of output, or a given output buffer, determine the bounds required of all unbound Im...
Func clone_in(const std::vector< Func > &fs)
Module compile_to_module(const std::vector< Argument > &args, const std::string &fn_name="", const Target &target=get_target_from_environment())
Store an internal representation of lowered code as a self contained Module suitable for further comp...
Func & atomic(bool override_associativity_test=false)
Issue atomic updates for this Func.
void define_extern(const std::string &function_name, const std::vector< ExternFuncArgument > ¶ms, const std::vector< Type > &types, int dimensionality, NameMangling mangling=NameMangling::Default, DeviceAPI device_api=DeviceAPI::Host)
void realize(Pipeline::RealizationArg outputs, const Target &target=Target())
Evaluate this function into an existing allocated buffer or buffers.
Func & unroll(const VarOrRVar &var)
Mark a dimension to be completely unrolled.
Func & set_estimates(const Region &estimates)
Set (min, extent) estimates for all dimensions in the Func at once; this is equivalent to calling set...
Func in()
Create and return a global identity wrapper, which wraps all calls to this Func by any other Func.
OutputImageParam output_buffer() const
Get a handle on the output buffer for this Func.
Expr update_value(int idx=0) const
Get the right-hand-side of an update definition.
void compile_to(const std::map< OutputFileType, std::string > &output_files, const std::vector< Argument > &args, const std::string &fn_name, const Target &target=get_target_from_environment())
Compile and generate multiple target files with single call.
std::vector< Argument > infer_arguments() const
Infer the arguments to the Func, sorted into a canonical order: all buffers (sorted alphabetically by...
void compile_to_llvm_assembly(const std::string &filename, const std::vector< Argument > &, const Target &target=get_target_from_environment())
Func & store_at(const Func &f, const Var &var)
Allocate storage for this function within f's loop over var.
void add_custom_lowering_pass(T *pass)
Add a custom pass to be used during lowering.
Func in(const std::vector< Func > &fs)
Create and return an identity wrapper shared by all the Funcs in 'fs'.
Func & fold_storage(const Var &dim, const Expr &extent, bool fold_forward=true)
Store realizations of this function in a circular buffer of a given extent.
Func & hoist_storage_root()
Equivalent to Func::hoist_storage_root, but schedules storage outside the outermost loop.
Realization realize(std::vector< int32_t > sizes={}, const Target &target=Target())
Evaluate this function over some rectangular domain and return the resulting buffer or buffers.
void realize(JITUserContext *context, Pipeline::RealizationArg outputs, const Target &target=Target())
Same as above, but takes a custom user-provided context to be passed to runtime functions.
Func & compute_at(LoopLevel loop_level)
Schedule a function to be computed within the iteration over a given LoopLevel.
Func & gpu_threads(const VarOrRVar &thread_x, DeviceAPI device_api=DeviceAPI::Default_GPU)
Tell Halide that the following dimensions correspond to GPU thread indices.
Func & always_partition(const std::vector< VarOrRVar > &vars)
Set the loop partition policy to Always for a vector of Vars and RVars.
void define_extern(const std::string &function_name, const std::vector< ExternFuncArgument > ¶ms, Type t, int dimensionality, NameMangling mangling=NameMangling::Default, DeviceAPI device_api=DeviceAPI::Host)
Add an extern definition for this Func.
void compile_to_file(const std::string &filename_prefix, const std::vector< Argument > &args, const std::string &fn_name="", const Target &target=get_target_from_environment())
Compile to object file and header pair, with the given arguments.
void add_custom_lowering_pass(Internal::IRMutator *pass, std::function< void()> deleter)
Add a custom pass to be used during lowering, with the function that will be called to delete it also...
Func & add_trace_tag(const std::string &trace_tag)
Add a string of arbitrary text that will be passed thru to trace inspection code if the Func is reali...
Func & store_at(const Func &f, const RVar &var)
Equivalent to the version of store_at that takes a Var, but schedules storage within the loop over a ...
void clear_custom_lowering_passes()
Remove all previously-set custom lowering passes.
void compile_to_llvm_assembly(const std::string &filename, const std::vector< Argument > &, const std::string &fn_name, const Target &target=get_target_from_environment())
Statically compile this function to llvm assembly, with the given filename (which should probably end...
const std::string & name() const
The name of this function, either given during construction, or automatically generated.
void compile_to_multitarget_static_library(const std::string &filename_prefix, const std::vector< Argument > &args, const std::vector< Target > &targets)
Compile to static-library file and header pair once for each target; each resulting function will be ...
Func & hexagon(const VarOrRVar &x=Var::outermost())
Schedule for execution on Hexagon.
Func & tile(const std::vector< VarOrRVar > &previous, const std::vector< VarOrRVar > &inners, const std::vector< Expr > &factors, TailStrategy tail=TailStrategy::Auto)
Generalized tiling, reusing the previous names as the outer names.
Func & store_root()
Equivalent to Func::store_at, but schedules storage outside the outermost loop.
Func & gpu_tile(const VarOrRVar &x, const VarOrRVar &y, const VarOrRVar &bx, const VarOrRVar &by, const VarOrRVar &tx, const VarOrRVar &ty, const Expr &x_size, const Expr &y_size, TailStrategy tail=TailStrategy::Auto, DeviceAPI device_api=DeviceAPI::Default_GPU)
Func & prefetch(const Func &f, const VarOrRVar &at, const VarOrRVar &from, Expr offset=1, PrefetchBoundStrategy strategy=PrefetchBoundStrategy::GuardWithIf)
Prefetch data written to or read from a Func or an ImageParam by a subsequent loop iteration,...
void compile_to_assembly(const std::string &filename, const std::vector< Argument > &, const Target &target=get_target_from_environment())
std::vector< RVar > rvars(int idx=0) const
Get the RVars of the reduction domain for an update definition, if there is one.
Func clone_in(const Func &f)
Similar to Func::in; however, instead of replacing the call to this Func with an identity Func that r...
const std::vector< CustomLoweringPass > & custom_lowering_passes()
Get the custom lowering passes.
HALIDE_NO_USER_CODE_INLINE std::enable_if< Internal::all_are_convertible< Var, Args... >::value, FuncRef >::type operator()(Args &&...args) const
Func & hoist_storage(const Func &f, const Var &var)
Hoist storage for this function within f's loop over var.
Func & compute_inline()
Aggressively inline all uses of this function.
Func(Internal::Function f)
Construct a new Func to wrap an existing, already-define Function object.
void compile_to_object(const std::string &filename, const std::vector< Argument > &, const std::string &fn_name, const Target &target=get_target_from_environment())
Statically compile this function to an object file, with the given filename (which should probably en...
HALIDE_NO_USER_CODE_INLINE std::enable_if< Internal::all_are_convertible< VarOrRVar, Args... >::value, Func & >::type never_partition(const VarOrRVar &x, Args &&...args)
Set the loop partition policy to Never for some number of Vars and RVars.
Func & bound_storage(const Var &dim, const Expr &bound)
Bound the extent of a Func's storage, but not extent of its compute.
Func & rename(const VarOrRVar &old_name, const VarOrRVar &new_name)
Rename a dimension.
Tuple values() const
The values returned by this function.
const std::string & extern_function_name() const
Get the name of the extern function called for an extern definition.
Func copy_to_device(DeviceAPI d=DeviceAPI::Default_GPU)
Declare that this function should be implemented by a call to halide_buffer_copy with the given targe...
Func & parallel(const VarOrRVar &var)
Mark a dimension to be traversed in parallel.
Func & gpu_threads(const VarOrRVar &thread_x, const VarOrRVar &thread_y, const VarOrRVar &thread_z, DeviceAPI device_api=DeviceAPI::Default_GPU)
void compile_to_object(const std::string &filename, const std::vector< Argument > &, const Target &target=get_target_from_environment())
Func & hoist_storage(LoopLevel loop_level)
Equivalent to the version of hoist_storage that takes a Var, but schedules storage at a given LoopLev...
Func & reorder_storage(const std::vector< Var > &dims)
Specify how the storage for the function is laid out.
Func & gpu(const VarOrRVar &block_x, const VarOrRVar &block_y, const VarOrRVar &block_z, const VarOrRVar &thread_x, const VarOrRVar &thread_y, const VarOrRVar &thread_z, DeviceAPI device_api=DeviceAPI::Default_GPU)
Func & no_profiling()
Marks this function as a function that should not be profiled when using the target feature Profile o...
void define_extern(const std::string &function_name, const std::vector< ExternFuncArgument > ¶ms, Type t, const std::vector< Var > &arguments, NameMangling mangling=NameMangling::Default, DeviceAPI device_api=DeviceAPI::Host)
void infer_input_bounds(JITUserContext *context, const std::vector< int32_t > &sizes, const Target &target=get_jit_target_from_environment())
Versions of infer_input_bounds that take a custom user context to pass to runtime functions.
Func & vectorize(const VarOrRVar &var)
Mark a dimension to be computed all-at-once as a single vector.
void debug_to_file(const std::string &filename)
When this function is compiled, include code that dumps its values to a file after it is realized,...
Func & parallel(const VarOrRVar &var, const Expr &task_size, TailStrategy tail=TailStrategy::Auto)
Split a dimension by the given task_size, and the parallelize the outer dimension.
Func & fuse(const VarOrRVar &inner, const VarOrRVar &outer, const VarOrRVar &fused)
Join two dimensions into a single fused dimension.
Func in(const Func &f)
Creates and returns a new identity Func that wraps this Func.
Func & bound(const Var &var, Expr min, Expr extent)
Statically declare that the range over which a function should be evaluated is given by the second an...
std::vector< OutputImageParam > output_buffers() const
HALIDE_NO_USER_CODE_INLINE std::enable_if< Internal::all_are_convertible< VarOrRVar, Args... >::value, Func & >::type always_partition(const VarOrRVar &x, Args &&...args)
Set the loop partition policy to Always for some number of Vars and RVars.
Func & prefetch(const T &image, const VarOrRVar &at, const VarOrRVar &from, Expr offset=1, PrefetchBoundStrategy strategy=PrefetchBoundStrategy::GuardWithIf)
void compile_to_static_library(const std::string &filename_prefix, const std::vector< Argument > &args, const std::string &fn_name="", const Target &target=get_target_from_environment())
Compile to static-library file and header pair, with the given arguments.
Func & compute_at(const Func &f, const Var &var)
Compute this function as needed for each unique value of the given var for the given calling function...
Func & hoist_storage(const Func &f, const RVar &var)
Equivalent to the version of hoist_storage that takes a Var, but schedules storage within the loop ov...
FuncRef operator()(std::vector< Var >) const
Construct either the left-hand-side of a definition, or a call to a functions that happens to only co...
Func & always_partition_all()
Set the loop partition policy to Always for all Vars and RVar of the initial definition of the Func.
const Internal::StageSchedule & get_schedule() const
Return the current StageSchedule associated with this initial Stage of this Func.
Func & gpu_single_thread(DeviceAPI device_api=DeviceAPI::Default_GPU)
Tell Halide to run this stage using a single gpu thread and block.
void define_extern(const std::string &function_name, const std::vector< ExternFuncArgument > ¶ms, const std::vector< Type > &types, int dimensionality, NameMangling mangling)
Func & gpu_tile(const VarOrRVar &x, const VarOrRVar &bx, const VarOrRVar &tx, const Expr &x_size, TailStrategy tail=TailStrategy::Auto, DeviceAPI device_api=DeviceAPI::Default_GPU)
Short-hand for tiling a domain and mapping the tile indices to GPU block indices and the coordinates ...
A fragment of front-end syntax of the form f(x, y, z), where x, y, z are Vars or Exprs.
Stage operator*=(const FuncRef &)
FuncTupleElementRef operator[](int) const
When a FuncRef refers to a function that provides multiple outputs, you can access each output as an ...
Stage operator-=(const FuncRef &)
size_t size() const
How many outputs does the function this refers to produce.
Internal::Function function() const
What function is this calling?
Stage operator+=(Expr)
Define a stage that adds the given expression to this Func.
Stage operator-=(Expr)
Define a stage that adds the negative of the given expression to this Func.
Stage operator*=(Expr)
Define a stage that multiplies this Func by the given expression.
Stage operator-=(const Tuple &)
Stage operator/=(Expr)
Define a stage that divides this Func by the given expression.
Stage operator+=(const FuncRef &)
Stage operator=(const Expr &)
Use this as the left-hand-side of a definition or an update definition (see RDom).
Stage operator=(const FuncRef &)
FuncRef(Internal::Function, const std::vector< Var > &, int placeholder_pos=-1, int count=0)
Stage operator+=(const Tuple &)
FuncRef(const Internal::Function &, const std::vector< Expr > &, int placeholder_pos=-1, int count=0)
Stage operator/=(const FuncRef &)
Stage operator*=(const Tuple &)
Stage operator/=(const Tuple &)
Stage operator=(const Tuple &)
Use this as the left-hand-side of a definition or an update definition for a Func with multiple outpu...
A fragment of front-end syntax of the form f(x, y, z)[index], where x, y, z are Vars or Exprs.
int index() const
Return index to the function outputs.
Stage operator+=(const Expr &e)
Define a stage that adds the given expression to Tuple component 'idx' of this Func.
Stage operator*=(const Expr &e)
Define a stage that multiplies Tuple component 'idx' of this Func by the given expression.
Stage operator/=(const Expr &e)
Define a stage that divides Tuple component 'idx' of this Func by the given expression.
Stage operator=(const Expr &e)
Use this as the left-hand-side of an update definition of Tuple component 'idx' of a Func (see RDom).
Stage operator=(const FuncRef &e)
Internal::Function function() const
What function is this calling?
Stage operator-=(const Expr &e)
Define a stage that adds the negative of the given expression to Tuple component 'idx' of this Func.
FuncTupleElementRef(const FuncRef &ref, const std::vector< Expr > &args, int idx)
An Image parameter to a halide pipeline.
A Function definition which can either represent a init or an update definition.
const std::vector< Expr > & args() const
Get the default (no-specialization) arguments (left-hand-side) of the definition.
const StageSchedule & schedule() const
Get the default (no-specialization) stage-specific schedule associated with this definition.
bool defined() const
Definition objects are nullable.
const std::vector< StorageDim > & storage_dims() const
The list and order of dimensions used to store this function.
A reference-counted handle to Halide's internal representation of a function.
FuncSchedule & schedule()
Get a handle to the function-specific schedule for the purpose of modifying it.
const std::vector< std::string > & args() const
Get the pure arguments.
A base class for passes over the IR which modify it (e.g.
A schedule for a single stage of a Halide pipeline.
A reference to a site in a Halide statement at the top of the body of a particular for loop.
A handle on the output buffer of a pipeline.
A reference-counted handle to a parameter to a halide pipeline.
A class representing a Halide pipeline.
A multi-dimensional domain over which to iterate.
A reduction variable represents a single dimension of a reduction domain (RDom).
const std::string & name() const
The name of this reduction variable.
A Realization is a vector of references to existing Buffer objects.
A single definition of a Func.
Stage & prefetch(const Func &f, const VarOrRVar &at, const VarOrRVar &from, Expr offset=1, PrefetchBoundStrategy strategy=PrefetchBoundStrategy::GuardWithIf)
std::string name() const
Return the name of this stage, e.g.
Stage & gpu_tile(const VarOrRVar &x, const VarOrRVar &y, const VarOrRVar &bx, const VarOrRVar &by, const VarOrRVar &tx, const VarOrRVar &ty, const Expr &x_size, const Expr &y_size, TailStrategy tail=TailStrategy::Auto, DeviceAPI device_api=DeviceAPI::Default_GPU)
Stage & tile(const std::vector< VarOrRVar > &previous, const std::vector< VarOrRVar > &inners, const std::vector< Expr > &factors, TailStrategy tail=TailStrategy::Auto)
Stage & gpu_tile(const VarOrRVar &x, const VarOrRVar &tx, const Expr &x_size, TailStrategy tail=TailStrategy::Auto, DeviceAPI device_api=DeviceAPI::Default_GPU)
Stage & gpu_threads(const VarOrRVar &thread_x, const VarOrRVar &thread_y, DeviceAPI device_api=DeviceAPI::Default_GPU)
HALIDE_NO_USER_CODE_INLINE std::enable_if< Internal::all_are_convertible< VarOrRVar, Args... >::value, Stage & >::type reorder(const VarOrRVar &x, const VarOrRVar &y, Args &&...args)
Stage & compute_with(const Stage &s, const VarOrRVar &var, const std::vector< std::pair< VarOrRVar, LoopAlignStrategy > > &align)
Func rfactor(const RVar &r, const Var &v)
Stage & gpu(const VarOrRVar &block_x, const VarOrRVar &thread_x, DeviceAPI device_api=DeviceAPI::Default_GPU)
Stage & vectorize(const VarOrRVar &var)
Stage & never_partition(const std::vector< VarOrRVar > &vars)
Stage & gpu_lanes(const VarOrRVar &thread_x, DeviceAPI device_api=DeviceAPI::Default_GPU)
Stage & unroll(const VarOrRVar &var)
Stage & compute_with(LoopLevel loop_level, const std::vector< std::pair< VarOrRVar, LoopAlignStrategy > > &align)
Schedule the iteration over this stage to be fused with another stage 's' from outermost loop to a gi...
Stage & gpu_tile(const VarOrRVar &x, const VarOrRVar &y, const VarOrRVar &z, const VarOrRVar &tx, const VarOrRVar &ty, const VarOrRVar &tz, const Expr &x_size, const Expr &y_size, const Expr &z_size, TailStrategy tail=TailStrategy::Auto, DeviceAPI device_api=DeviceAPI::Default_GPU)
Stage & gpu_tile(const VarOrRVar &x, const VarOrRVar &bx, const VarOrRVar &tx, const Expr &x_size, TailStrategy tail=TailStrategy::Auto, DeviceAPI device_api=DeviceAPI::Default_GPU)
Func rfactor(std::vector< std::pair< RVar, Var > > preserved)
Calling rfactor() on an associative update definition a Func will split the update into an intermedia...
Stage & allow_race_conditions()
Stage & tile(const VarOrRVar &x, const VarOrRVar &y, const VarOrRVar &xi, const VarOrRVar &yi, const Expr &xfactor, const Expr &yfactor, TailStrategy tail=TailStrategy::Auto)
Stage & parallel(const VarOrRVar &var, const Expr &task_size, TailStrategy tail=TailStrategy::Auto)
Stage & rename(const VarOrRVar &old_name, const VarOrRVar &new_name)
Stage & tile(const std::vector< VarOrRVar > &previous, const std::vector< VarOrRVar > &outers, const std::vector< VarOrRVar > &inners, const std::vector< Expr > &factors, TailStrategy tail=TailStrategy::Auto)
Stage & gpu_single_thread(DeviceAPI device_api=DeviceAPI::Default_GPU)
Stage & partition(const VarOrRVar &var, Partition partition_policy)
Stage & unroll(const VarOrRVar &var, const Expr &factor, TailStrategy tail=TailStrategy::Auto)
Stage specialize(const Expr &condition)
Stage & prefetch(const T &image, const VarOrRVar &at, const VarOrRVar &from, Expr offset=1, PrefetchBoundStrategy strategy=PrefetchBoundStrategy::GuardWithIf)
Stage & gpu_threads(const VarOrRVar &thread_x, const VarOrRVar &thread_y, const VarOrRVar &thread_z, DeviceAPI device_api=DeviceAPI::Default_GPU)
Stage & prefetch(const Parameter ¶m, const VarOrRVar &at, const VarOrRVar &from, Expr offset=1, PrefetchBoundStrategy strategy=PrefetchBoundStrategy::GuardWithIf)
Stage & reorder(const std::vector< VarOrRVar > &vars)
Stage & gpu(const VarOrRVar &block_x, const VarOrRVar &block_y, const VarOrRVar &block_z, const VarOrRVar &thread_x, const VarOrRVar &thread_y, const VarOrRVar &thread_z, DeviceAPI device_api=DeviceAPI::Default_GPU)
Stage(Internal::Function f, Internal::Definition d, size_t stage_index)
Stage & gpu_blocks(const VarOrRVar &block_x, const VarOrRVar &block_y, const VarOrRVar &block_z, DeviceAPI device_api=DeviceAPI::Default_GPU)
Stage & tile(const std::vector< VarOrRVar > &previous, const std::vector< VarOrRVar > &outers, const std::vector< VarOrRVar > &inners, const std::vector< Expr > &factors, const std::vector< TailStrategy > &tails)
Stage & compute_with(LoopLevel loop_level, LoopAlignStrategy align=LoopAlignStrategy::Auto)
Stage & parallel(const VarOrRVar &var)
const Internal::StageSchedule & get_schedule() const
Return the current StageSchedule associated with this Stage.
Stage & serial(const VarOrRVar &var)
Stage & gpu_blocks(const VarOrRVar &block_x, DeviceAPI device_api=DeviceAPI::Default_GPU)
Stage & fuse(const VarOrRVar &inner, const VarOrRVar &outer, const VarOrRVar &fused)
HALIDE_NO_USER_CODE_INLINE std::enable_if< Internal::all_are_convertible< VarOrRVar, Args... >::value, Stage & >::type never_partition(const VarOrRVar &x, Args &&...args)
Stage & vectorize(const VarOrRVar &var, const Expr &factor, TailStrategy tail=TailStrategy::Auto)
Stage & gpu(const VarOrRVar &block_x, const VarOrRVar &block_y, const VarOrRVar &thread_x, const VarOrRVar &thread_y, DeviceAPI device_api=DeviceAPI::Default_GPU)
Stage & compute_with(const Stage &s, const VarOrRVar &var, LoopAlignStrategy align=LoopAlignStrategy::Auto)
void specialize_fail(const std::string &message)
HALIDE_NO_USER_CODE_INLINE std::enable_if< Internal::all_are_convertible< VarOrRVar, Args... >::value, Stage & >::type always_partition(const VarOrRVar &x, Args &&...args)
Stage & tile(const VarOrRVar &x, const VarOrRVar &y, const VarOrRVar &xo, const VarOrRVar &yo, const VarOrRVar &xi, const VarOrRVar &yi, const Expr &xfactor, const Expr &yfactor, TailStrategy tail=TailStrategy::Auto)
Stage & gpu_blocks(const VarOrRVar &block_x, const VarOrRVar &block_y, DeviceAPI device_api=DeviceAPI::Default_GPU)
Stage & hexagon(const VarOrRVar &x=Var::outermost())
Stage & split(const VarOrRVar &old, const VarOrRVar &outer, const VarOrRVar &inner, const Expr &factor, TailStrategy tail=TailStrategy::Auto)
Scheduling calls that control how the domain of this stage is traversed.
Stage & always_partition_all()
Stage & never_partition_all()
Stage & atomic(bool override_associativity_test=false)
Stage & gpu_threads(const VarOrRVar &thread_x, DeviceAPI device_api=DeviceAPI::Default_GPU)
std::string dump_argument_list() const
Return a string describing the current var list taking into account all the splits,...
Stage & gpu_tile(const VarOrRVar &x, const VarOrRVar &y, const VarOrRVar &tx, const VarOrRVar &ty, const Expr &x_size, const Expr &y_size, TailStrategy tail=TailStrategy::Auto, DeviceAPI device_api=DeviceAPI::Default_GPU)
Stage & gpu_tile(const VarOrRVar &x, const VarOrRVar &y, const VarOrRVar &z, const VarOrRVar &bx, const VarOrRVar &by, const VarOrRVar &bz, const VarOrRVar &tx, const VarOrRVar &ty, const VarOrRVar &tz, const Expr &x_size, const Expr &y_size, const Expr &z_size, TailStrategy tail=TailStrategy::Auto, DeviceAPI device_api=DeviceAPI::Default_GPU)
void unscheduled()
Assert that this stage has intentionally been given no schedule, and suppress the warning about unsch...
Stage & always_partition(const std::vector< VarOrRVar > &vars)
Create a small array of Exprs for defining and calling functions with multiple outputs.
A Halide variable, to be used when defining functions.
const std::string & name() const
Get the name of a Var.
static Var outermost()
A Var that represents the location outside the outermost loop.
void schedule_scalar(Func f)
std::vector< Var > make_argument_list(int dimensionality)
Make a list of unique arguments for definitions with unnamed arguments.
void assign_results(Realization &r, int idx, Last last)
void check_types(const Tuple &t, int idx)
ForType
An enum describing a type of loop traversal.
This file defines the class FunctionDAG, which is our representation of a Halide pipeline,...
@ Internal
Not visible externally, similar to 'static' linkage in C.
Type type_of()
Construct the halide equivalent of a C type.
PrefetchBoundStrategy
Different ways to handle accesses outside the original extents in a prefetch.
@ GuardWithIf
Guard the prefetch with if-guards that ignores the prefetch if any of the prefetched region ever goes...
HALIDE_NO_USER_CODE_INLINE T evaluate_may_gpu(const Expr &e)
JIT-Compile and run enough code to evaluate a Halide expression.
TailStrategy
Different ways to handle a tail case in a split when the factor does not provably divide the extent.
@ Auto
For pure definitions use ShiftInwards.
LoopAlignStrategy
Different ways to handle the case when the start/end of the loops of stages computed with (fused) are...
@ Auto
By default, LoopAlignStrategy is set to NoAlign.
Expr min(const FuncRef &a, const FuncRef &b)
Explicit overloads of min and max for FuncRef.
NameMangling
An enum to specify calling convention for extern stages.
@ Default
Match whatever is specified in the Target.
Target get_jit_target_from_environment()
Return the target that Halide will use for jit-compilation.
DeviceAPI
An enum describing a type of device API.
@ Host
Used to denote for loops that run on the same device as the containing code.
Target get_target_from_environment()
Return the target that Halide will use.
StmtOutputFormat
Used to determine if the output printed to file should be as a normal string or as an HTML file which...
std::vector< Range > Region
A multi-dimensional box.
Expr max(const FuncRef &a, const FuncRef &b)
MemoryType
An enum describing different address spaces to be used with Func::store_in.
Partition
Different ways to handle loops with a potentially optimizable boundary conditions.
HALIDE_NO_USER_CODE_INLINE T evaluate(JITUserContext *ctx, const Expr &e)
JIT-Compile and run enough code to evaluate a Halide expression.
A fragment of Halide syntax.
HALIDE_ALWAYS_INLINE Type type() const
Get the type of this expression node.
An argument to an extern-defined Func.
A set of custom overrides of runtime functions.
A context to be passed to Pipeline::realize.
A struct representing a target machine and os to generate code for.
bool has_gpu_feature() const
Is a fully feature GPU compute runtime enabled? I.e.
bool has_feature(Feature f) const
Types in the halide type system.
A class that can represent Vars or RVars.
VarOrRVar(const std::string &n, bool r)
VarOrRVar(const ImplicitVar< N > &u)
const std::string & name() const