docs/_schedule_8h_source.html

#ifndef HALIDE_SCHEDULE_H

#define HALIDE_SCHEDULE_H


/** \file

 * Defines the internal representation of the schedule for a function

 */


#include <map>

#include <string>

#include <utility>

#include <vector>


#include "DeviceAPI.h"

#include "Expr.h"

#include "FunctionPtr.h"

#include "LoopPartitioningDirective.h"

#include "Parameter.h"

#include "PrefetchDirective.h"


namespace Halide {


class Func;

struct VarOrRVar;


namespace Internal {

class Function;

struct FunctionContents;

struct LoopLevelContents;

}  // namespace Internal


/** Different ways to handle a tail case in a split when the

 * factor does not provably divide the extent. */


enum class TailStrategy {

    /** Round up the extent to be a multiple of the split

     * factor. Not legal for RVars, as it would change the meaning

     * of the algorithm. Pros: generates the simplest, fastest

     * code. Cons: if used on a stage that reads from the input or

     * writes to the output, constrains the input or output size

     * to be a multiple of the split factor. */

    RoundUp,


    /** Guard the inner loop with an if statement that prevents

     * evaluation beyond the original extent. Always legal. The if

     * statement is treated like a boundary condition, and

     * factored out into a loop epilogue if possible. Pros: no

     * redundant re-evaluation; does not constrain input our

     * output sizes. Cons: increases code size due to separate

     * tail-case handling; vectorization will scalarize in the tail

     * case to handle the if statement. */

    GuardWithIf,


    /** Guard the loads and stores in the loop with an if statement

     * that prevents evaluation beyond the original extent. Always

     * legal. The if statement is treated like a boundary condition,

     * and factored out into a loop epilogue if possible.

     * Pros: no redundant re-evaluation; does not constrain input or

     * output sizes. Cons: increases code size due to separate

     * tail-case handling. */

    Predicate,


    /** Guard the loads in the loop with an if statement that

     * prevents evaluation beyond the original extent. Only legal

     * for innermost splits. Not legal for RVars, as it would change

     * the meaning of the algorithm. The if statement is treated like

     * a boundary condition, and factored out into a loop epilogue if

     * possible.

     * Pros: does not constrain input sizes, output size constraints

     * are simpler than full predication. Cons: increases code size

     * due to separate tail-case handling, constrains the output size

     * to be a multiple of the split factor. */

    PredicateLoads,


    /** Guard the stores in the loop with an if statement that

     * prevents evaluation beyond the original extent. Only legal

     * for innermost splits. Not legal for RVars, as it would change

     * the meaning of the algorithm. The if statement is treated like

     * a boundary condition, and factored out into a loop epilogue if

     * possible.

     * Pros: does not constrain output sizes, input size constraints

     * are simpler than full predication. Cons: increases code size

     * due to separate tail-case handling, constraints the input size

     * to be a multiple of the split factor.. */

    PredicateStores,


    /** Prevent evaluation beyond the original extent by shifting

     * the tail case inwards, re-evaluating some points near the

     * end. Only legal for pure variables in pure definitions. If

     * the inner loop is very simple, the tail case is treated

     * like a boundary condition and factored out into an

     * epilogue.

     *

     * This is a good trade-off between several factors. Like

     * RoundUp, it supports vectorization well, because the inner

     * loop is always a fixed size with no data-dependent

     * branching. It increases code size slightly for inner loops

     * due to the epilogue handling, but not for outer loops

     * (e.g. loops over tiles). If used on a stage that reads from

     * an input or writes to an output, this stategy only requires

     * that the input/output extent be at least the split factor,

     * instead of a multiple of the split factor as with RoundUp. */

    ShiftInwards,


    /** Equivalent to ShiftInwards, but protects values that would be

     * re-evaluated by loading the memory location that would be stored to,

     * modifying only the elements not contained within the overlap, and then

     * storing the blended result.

     *

     * This tail strategy is useful when you want to use ShiftInwards to

     * vectorize without a scalar tail, but are scheduling a stage where that

     * isn't legal (e.g. an update definition).

     *

     * Because this is a read - modify - write, this tail strategy cannot be

     * used on any dimension the stage is parallelized over as it would cause a

     * race condition.

     */

    ShiftInwardsAndBlend,


    /** Equivalent to RoundUp, but protected values that would be written beyond

     * the end by loading the memory location that would be stored to,

     * modifying only the elements within the region being computed, and then

     * storing the blended result.

     *

     * This tail strategy is useful when vectorizing an update to some sub-region

     * of a larger Func. As with ShiftInwardsAndBlend, it can't be combined with

     * parallelism.

     */

    RoundUpAndBlend,


    /** For pure definitions use ShiftInwards. For pure vars in

     * update definitions use RoundUp. For RVars in update

     * definitions use GuardWithIf. */

    Auto

};


/** Different ways to handle the case when the start/end of the loops of stages

 * computed with (fused) are not aligned. */


enum class LoopAlignStrategy {

    /** Shift the start of the fused loops to align. */

    AlignStart,


    /** Shift the end of the fused loops to align. */

    AlignEnd,


    /** compute_with will make no attempt to align the start/end of the

     * fused loops. */

    NoAlign,


    /** By default, LoopAlignStrategy is set to NoAlign. */

    Auto

};


/** A reference to a site in a Halide statement at the top of the

 * body of a particular for loop. Evaluating a region of a halide

 * function is done by generating a loop nest that spans its

 * dimensions. We schedule the inputs to that function by

 * recursively injecting realizations for them at particular sites

 * in this loop nest. A LoopLevel identifies such a site. The site

 * can either be a loop nest within all stages of a function

 * or it can refer to a loop nest within a particular function's

 * stage (initial definition or updates).

 *

 * Note that a LoopLevel is essentially a pointer to an underlying value;

 * all copies of a LoopLevel refer to the same site, so mutating one copy

 * (via the set() method) will effectively mutate all copies:

 \code

 Func f;

 Var x;

 LoopLevel a(f, x);

 // Both a and b refer to LoopLevel(f, x)

 LoopLevel b = a;

 // Now both a and b refer to LoopLevel::root()

 a.set(LoopLevel::root());

 \endcode

 * This is quite useful when splitting Halide code into utility libraries, as it allows

 * a library to schedule code according to a caller's specifications, even if the caller

 * hasn't fully defined its pipeline yet:

 \code

 Func demosaic(Func input,

              LoopLevel intermed_compute_at,

              LoopLevel intermed_store_at,

              LoopLevel output_compute_at) {

    Func intermed = ...;

    Func output = ...;

    intermed.compute_at(intermed_compute_at).store_at(intermed_store_at);

    output.compute_at(output_compute_at);

    return output;

 }


 void process() {

     // Note that these LoopLevels are all undefined when we pass them to demosaic()

     LoopLevel intermed_compute_at, intermed_store_at, output_compute_at;

     Func input = ...;

     Func demosaiced = demosaic(input, intermed_compute_at, intermed_store_at, output_compute_at);

     Func output = ...;


     // We need to ensure all LoopLevels have a well-defined value prior to lowering:

     intermed_compute_at.set(LoopLevel(output, y));

     intermed_store_at.set(LoopLevel(output, y));

     output_compute_at.set(LoopLevel(output, x));

 }

 \endcode

 */


class LoopLevel {

    Internal::IntrusivePtr<Internal::LoopLevelContents> contents;


    explicit LoopLevel(Internal::IntrusivePtr<Internal::LoopLevelContents> c)

        : contents(std::move(c)) {

    }


public:

    /** Return the index of the function stage associated with this loop level.

     * Asserts if undefined */

    int stage_index() const;


    /** Identify the loop nest corresponding to some dimension of some function */

    // @{

    LoopLevel(const Internal::Function &f, const VarOrRVar &v, int stage_index = -1);

    LoopLevel(const Func &f, const VarOrRVar &v, int stage_index = -1);

    // @}


    /** Construct an undefined LoopLevel. Calling any method on an undefined

     * LoopLevel (other than set()) will assert. */

    LoopLevel();


    /** For deserialization only. */

    LoopLevel(const std::string &func_name, const std::string &var_name,

              bool is_rvar, int stage_index, bool locked = false);


    /** Construct a special LoopLevel value that implies

     * that a function should be inlined away. */

    static LoopLevel inlined();


    /** Construct a special LoopLevel value which represents the

     * location outside of all for loops. */

    static LoopLevel root();


    /** Mutate our contents to match the contents of 'other'. */

    void set(const LoopLevel &other);


    // All the public methods below this point are meant only for internal

    // use by Halide, rather than user code; hence, they are deliberately

    // documented with plain comments (rather than Doxygen) to avoid being

    // present in user documentation.


    // Lock this LoopLevel.

    LoopLevel &lock();


    // Return the Func name. Asserts if the LoopLevel is_root() or is_inlined() or !defined().

    std::string func() const;


    // Return the VarOrRVar. Asserts if the LoopLevel is_root() or is_inlined() or !defined().

    VarOrRVar var() const;


    // Return true iff the LoopLevel is defined. (Only LoopLevels created

    // with the default ctor are undefined.)

    bool defined() const;


    // Test if a loop level corresponds to inlining the function.

    bool is_inlined() const;


    // Test if a loop level is 'root', which describes the site

    // outside of all for loops.

    bool is_root() const;


    // For serialization only. Do not use in other cases.

    int get_stage_index() const;


    // For serialization only. Do not use in other cases.

    std::string func_name() const;


    // For serialization only. Do not use in other cases.

    std::string var_name() const;


    // For serialization only. Do not use in other cases.

    bool is_rvar() const;


    // For serialization only. Do not use in other cases.

    bool locked() const;


    // Return a string of the form func.var -- note that this is safe

    // to call for root or inline LoopLevels, but asserts if !defined().

    std::string to_string() const;


    // Compare this loop level against the variable name of a for

    // loop, to see if this loop level refers to the site

    // immediately inside this loop. Asserts if !defined().

    bool match(const std::string &loop) const;


    bool match(const LoopLevel &other) const;


    // Check if two loop levels are exactly the same.

    bool operator==(const LoopLevel &other) const;


    bool operator!=(const LoopLevel &other) const {

        return !(*this == other);

    }


private:

    void check_defined() const;

    void check_locked() const;

    void check_defined_and_locked() const;

};


struct FuseLoopLevel {

    LoopLevel level;

    /** Contains alignment strategies for the fused dimensions (indexed by the

     * dimension name). If not in the map, use the default alignment strategy

     * to align the fused dimension (see \ref LoopAlignStrategy::Auto).

     */

    std::map<std::string, LoopAlignStrategy> align;


    FuseLoopLevel()

        : level(LoopLevel::inlined().lock()) {

    }


    FuseLoopLevel(const LoopLevel &level, const std::map<std::string, LoopAlignStrategy> &align)

        : level(level), align(align) {

    }


};


namespace Internal {


class IRMutator;

struct ReductionVariable;


struct Split {

    std::string old_var, outer, inner;

    Expr factor;

    bool exact;  // Is it required that the factor divides the extent

                 // of the old var. True for splits of RVars. Forces

                 // tail strategy to be GuardWithIf.

    TailStrategy tail;


    enum SplitType { SplitVar = 0,

                     RenameVar,

                     FuseVars,

                     PurifyRVar };


    // If split_type is Rename, then this is just a renaming of the

    // old_var to the outer and not a split. The inner var should

    // be ignored, and factor should be one. Renames are kept in

    // the same list as splits so that ordering between them is

    // respected.


    // If split type is Purify, this replaces the old_var RVar to

    // the outer Var. The inner var should be ignored, and factor

    // should be one.


    // If split_type is Fuse, then this does the opposite of a

    // split, it joins the outer and inner into the old_var.

    SplitType split_type;

};


/** Each Dim below has a dim_type, which tells you what

 * transformations are legal on it. When you combine two Dims of

 * distinct DimTypes (e.g. with Stage::fuse), the combined result has

 * the greater enum value of the two types. */


enum class DimType {

    /** This dim originated from a Var. You can evaluate a Func at

     * distinct values of this Var in any order over an interval

     * that's at least as large as the interval required. In pure

     * definitions you can even redundantly re-evaluate points. */

    PureVar = 0,


    /** The dim originated from an RVar. You can evaluate a Func at

     * distinct values of this RVar in any order (including in

     * parallel) over exactly the interval specified in the

     * RDom. PureRVars can also be reordered arbitrarily in the dims

     * list, as there are no data hazards between the evaluation of

     * the Func at distinct values of the RVar.

     *

     * The most common case where an RVar is considered pure is RVars

     * that are used in a way which obeys all the syntactic

     * constraints that a Var does, e.g:

     *

     \code

     RDom r(0, 100);

     f(r.x) = f(r.x) + 5;

     \endcode

     *

     * Other cases where RVars are pure are where the sites being

     * written to by the Func evaluated at one value of the RVar

     * couldn't possibly collide with the sites being written or read

     * by the Func at a distinct value of the RVar. For example, r.x

     * is pure in the following three definitions:

     *

     \code


     // This definition writes to even coordinates and reads from the

     // same site (which no other value of r.x is writing to) and odd

     // sites (which no other value of r.x is writing to):

     f(2*r.x) = max(f(2*r.x), f(2*r.x + 7));


     // This definition writes to scanline zero and reads from the the

     // same site and scanline one:

     f(r.x, 0) += f(r.x, 1);


     // This definition reads and writes over non-overlapping ranges:

     f(r.x + 100) += f(r.x);

     \endcode

     *

     * To give two counterexamples, r.x is not pure in the following

     * definitions:

     *

     \code

     // The same site is written by distinct values of the RVar

     // (write-after-write hazard):

     f(r.x / 2) += f(r.x);


     // One value of r.x reads from a site that another value of r.x

     // is writing to (read-after-write hazard):

     f(r.x) += f(r.x + 1);

     \endcode

     */

    PureRVar,


    /** The dim originated from an RVar. You must evaluate a Func at

     * distinct values of this RVar in increasing order over precisely

     * the interval specified in the RDom. ImpureRVars may not be

     * reordered with respect to other ImpureRVars.

     *

     * All RVars are impure by default. Those for which we can prove

     * no data hazards exist get promoted to PureRVar. There are two

     * instances in which ImpureRVars may be parallelized or reordered

     * even in the presence of hazards:

     *

     * 1) In the case of an update definition that has been proven to be

     * an associative and commutative reduction, reordering of

     * ImpureRVars is allowed, and parallelizing them is allowed if

     * the update has been made atomic.

     *

     * 2) ImpureRVars can also be reordered and parallelized if

     * Func::allow_race_conditions() has been set. This is the escape

     * hatch for when there are no hazards but the checks above failed

     * to prove that (RDom::where can encode arbitrary facts about

     * non-linear integer arithmetic, which is undecidable), or for

     * when you don't actually care about the non-determinism

     * introduced by data hazards (e.g. in the algorithm HOGWILD!).

     */

    ImpureRVar,

};


/** The Dim struct represents one loop in the schedule's

 * representation of a loop nest. */


struct Dim {

    /** Name of the loop variable */

    std::string var;


    /** How are the loop values traversed (e.g. unrolled, vectorized, parallel) */

    ForType for_type;


    /** On what device does the body of the loop execute (e.g. Host, GPU, Hexagon) */

    DeviceAPI device_api;


    /** The DimType tells us what transformations are legal on this

     * loop (see the DimType enum above). */

    DimType dim_type;


    /** The strategy for loop partitioning. */

    Partition partition_policy;


    /** Can this loop be evaluated in any order (including in

     * parallel)? Equivalently, are there no data hazards between

     * evaluations of the Func at distinct values of this var? */


    bool is_pure() const {

        return (dim_type == DimType::PureVar) || (dim_type == DimType::PureRVar);

    }


    /** Did this loop originate from an RVar (in which case the bounds

     * of the loops are algorithmically meaningful)? */


    bool is_rvar() const {

        return (dim_type == DimType::PureRVar) || (dim_type == DimType::ImpureRVar);

    }


    /** Could multiple iterations of this loop happen at the same

     * time, with reads and writes interleaved in arbitrary ways

     * according to the memory model of the underlying compiler and

     * machine? */


    bool is_unordered_parallel() const {

        return Halide::Internal::is_unordered_parallel(for_type);

    }


    /** Could multiple iterations of this loop happen at the same

     * time? Vectorized and GPULanes loop types are parallel but not

     * unordered, because the loop iterations proceed together in

     * lockstep with some well-defined outcome if there are hazards. */


    bool is_parallel() const {

        return Halide::Internal::is_parallel(for_type);

    }


};


/** A bound on a loop, typically from Func::bound */


struct Bound {

    /** The loop var being bounded */

    std::string var;


    /** Declared min and extent of the loop. min may be undefined if

     * Func::bound_extent was used. */

    Expr min, extent;


    /** If defined, the number of iterations will be a multiple of

     * "modulus", and the first iteration will be at a value congruent

     * to "remainder" modulo "modulus". Set by Func::align_bounds and

     * Func::align_extent. */

    Expr modulus, remainder;

};


/** Properties of one axis of the storage of a Func */


struct StorageDim {

    /** The var in the pure definition corresponding to this axis */

    std::string var;


    /** The bounds allocated (not computed) must be a multiple of

     * "alignment". Set by Func::align_storage. */

    Expr alignment;


    /** The bounds allocated (not computed). Set by Func::bound_storage. */

    Expr bound;


    /** If the Func is explicitly folded along this axis (with

     * Func::fold_storage) this gives the extent of the circular

     * buffer used, and whether it is used in increasing order

     * (fold_forward = true) or decreasing order (fold_forward =

     * false). */

    Expr fold_factor;

    bool fold_forward;

};


/** This represents two stages with fused loop nests from outermost to

 * a specific loop level. The loops to compute func_1(stage_1) are

 * fused with the loops to compute func_2(stage_2) from outermost to

 * loop level var_name and the computation from stage_1 of func_1

 * occurs first.

 */


struct FusedPair {

    std::string func_1;

    std::string func_2;

    size_t stage_1;

    size_t stage_2;

    std::string var_name;


    FusedPair() = default;


    FusedPair(const std::string &f1, size_t s1, const std::string &f2,

              size_t s2, const std::string &var)

        : func_1(f1), func_2(f2), stage_1(s1), stage_2(s2), var_name(var) {

    }


    bool operator==(const FusedPair &other) const {

        return (func_1 == other.func_1) && (func_2 == other.func_2) &&

               (stage_1 == other.stage_1) && (stage_2 == other.stage_2) &&

               (var_name == other.var_name);

    }


    bool operator<(const FusedPair &other) const {

        if (func_1 != other.func_1) {

            return func_1 < other.func_1;

        }

        if (func_2 != other.func_2) {

            return func_2 < other.func_2;

        }

        if (var_name != other.var_name) {

            return var_name < other.var_name;

        }

        if (stage_1 != other.stage_1) {

            return stage_1 < other.stage_1;

        }

        return stage_2 < other.stage_2;

    }


};


struct FuncScheduleContents;

struct StageScheduleContents;

struct FunctionContents;


/** A schedule for a Function of a Halide pipeline. This schedule is

 * applied to all stages of the Function. Right now this interface is

 * basically a struct, offering mutable access to its innards.

 * In the future it may become more encapsulated. */


class FuncSchedule {

    IntrusivePtr<FuncScheduleContents> contents;


public:


    FuncSchedule(IntrusivePtr<FuncScheduleContents> c)

        : contents(std::move(c)) {

    }


    FuncSchedule(const FuncSchedule &other) = default;

    FuncSchedule();


    /** Return a deep copy of this FuncSchedule. It recursively deep copies all

     * called functions, schedules, specializations, and reduction domains. This

     * method takes a map of <old FunctionContents, deep-copied version> as input

     * and would use the deep-copied FunctionContents from the map if exists

     * instead of creating a new deep-copy to avoid creating deep-copies of the

     * same FunctionContents multiple times.

     */

    FuncSchedule deep_copy(

        std::map<FunctionPtr, FunctionPtr> &copied_map) const;


    /** This flag is set to true if the schedule is memoized. */

    // @{

    bool &memoized();

    bool memoized() const;

    // @}


    /** This flag is set to true if the schedule is memoized and has an attached

     *  eviction key. */

    // @{

    Expr &memoize_eviction_key();

    Expr memoize_eviction_key() const;

    // @}


    /** Is the production of this Function done asynchronously */

    bool &async();

    bool async() const;


    Expr &ring_buffer();

    Expr &ring_buffer() const;


    /** The list and order of dimensions used to store this

     * function. The first dimension in the vector corresponds to the

     * innermost dimension for storage (i.e. which dimension is

     * tightly packed in memory) */

    // @{

    const std::vector<StorageDim> &storage_dims() const;

    std::vector<StorageDim> &storage_dims();

    // @}


    /** The memory type (heap/stack/shared/etc) used to back this Func. */

    // @{

    MemoryType memory_type() const;

    MemoryType &memory_type();

    // @}


    /** You may explicitly bound some of the dimensions of a function,

     * or constrain them to lie on multiples of a given factor. See

     * \ref Func::bound and \ref Func::align_bounds and \ref Func::align_extent. */

    // @{

    const std::vector<Bound> &bounds() const;

    std::vector<Bound> &bounds();

    // @}


    /** You may explicitly specify an estimate of some of the function

     * dimensions. See \ref Func::set_estimate */

    // @{

    const std::vector<Bound> &estimates() const;

    std::vector<Bound> &estimates();

    // @}


    /** Mark calls of a function by 'f' to be replaced with its identity

     * wrapper or clone during the lowering stage. If the string 'f' is empty,

     * it means replace all calls to the function by all other functions

     * (excluding itself) in the pipeline with the global identity wrapper.

     * See \ref Func::in and \ref Func::clone_in for more details. */

    // @{

    const std::map<std::string, Internal::FunctionPtr> &wrappers() const;

    std::map<std::string, Internal::FunctionPtr> &wrappers();

    void add_wrapper(const std::string &f,

                     const Internal::FunctionPtr &wrapper);

    // @}


    /** At what sites should we inject the allocation and the

     * computation of this function? The store_level must be outside

     * of or equal to the compute_level. If the compute_level is

     * inline, the store_level is meaningless. See \ref Func::store_at

     * and \ref Func::compute_at */

    // @{

    const LoopLevel &store_level() const;

    const LoopLevel &compute_level() const;

    const LoopLevel &hoist_storage_level() const;

    LoopLevel &store_level();

    LoopLevel &compute_level();

    LoopLevel &hoist_storage_level();

    // @}


    /** Pass an IRVisitor through to all Exprs referenced in the

     * Schedule. */

    void accept(IRVisitor *) const;


    /** Pass an IRMutator through to all Exprs referenced in the

     * Schedule. */

    void mutate(IRMutator *);

};


/** A schedule for a single stage of a Halide pipeline. Right now this

 * interface is basically a struct, offering mutable access to its

 * innards. In the future it may become more encapsulated. */


class StageSchedule {

    IntrusivePtr<StageScheduleContents> contents;


public:


    StageSchedule(IntrusivePtr<StageScheduleContents> c)

        : contents(std::move(c)) {

    }


    StageSchedule(const StageSchedule &other) = default;

    StageSchedule();

    StageSchedule(const std::vector<ReductionVariable> &rvars, const std::vector<Split> &splits,

                  const std::vector<Dim> &dims, const std::vector<PrefetchDirective> &prefetches,

                  const FuseLoopLevel &fuse_level, const std::vector<FusedPair> &fused_pairs,

                  bool touched, bool allow_race_conditions, bool atomic, bool override_atomic_associativity_test);


    /** Return a copy of this StageSchedule. */

    StageSchedule get_copy() const;


    /** This flag is set to true if the dims list has been manipulated

     * by the user (or if a ScheduleHandle was created that could have

     * been used to manipulate it). It controls the warning that

     * occurs if you schedule the vars of the pure step but not the

     * update steps. */

    // @{

    bool &touched();

    bool touched() const;

    // @}


    /** RVars of reduction domain associated with this schedule if there is any. */

    // @{

    const std::vector<ReductionVariable> &rvars() const;

    std::vector<ReductionVariable> &rvars();

    // @}


    /** The traversal of the domain of a function can have some of its

     * dimensions split into sub-dimensions. See \ref Func::split */

    // @{

    const std::vector<Split> &splits() const;

    std::vector<Split> &splits();

    // @}


    /** The list and ordering of dimensions used to evaluate this

     * function, after all splits have taken place. The first

     * dimension in the vector corresponds to the innermost for loop,

     * and the last is the outermost. Also specifies what type of for

     * loop to use for each dimension. Does not specify the bounds on

     * each dimension. These get inferred from how the function is

     * used, what the splits are, and any optional bounds in the list below. */

    // @{

    const std::vector<Dim> &dims() const;

    std::vector<Dim> &dims();

    // @}


    /** You may perform prefetching in some of the dimensions of a

     * function. See \ref Func::prefetch */

    // @{

    const std::vector<PrefetchDirective> &prefetches() const;

    std::vector<PrefetchDirective> &prefetches();

    // @}


    /** Innermost loop level of fused loop nest for this function stage.

     * Fusion runs from outermost to this loop level. The stages being fused

     * should not have producer/consumer relationship. See \ref Func::compute_with

     * and \ref Func::compute_with */

    // @{

    const FuseLoopLevel &fuse_level() const;

    FuseLoopLevel &fuse_level();

    // @}


    /** List of function stages that are to be fused with this function stage

     * from the outermost loop to a certain loop level. Those function stages

     * are to be computed AFTER this function stage at the last fused loop level.

     * This list is populated when realization_order() is called. See

     * \ref Func::compute_with */

    // @{

    const std::vector<FusedPair> &fused_pairs() const;

    std::vector<FusedPair> &fused_pairs();


    /** Are race conditions permitted? */

    // @{

    bool allow_race_conditions() const;

    bool &allow_race_conditions();

    // @}


    /** Use atomic update? */

    // @{

    bool atomic() const;

    bool &atomic();

    // @}


    /** Atomic updates are only allowed on associative reductions.

     *  We try to prove the associativity, but the user can override

     *  the associativity test and suppress compiler error if the prover

     *  fails to recognize the associativity or the user does not care. */

    // @{

    bool override_atomic_associativity_test() const;

    bool &override_atomic_associativity_test();

    // @}


    /** Pass an IRVisitor through to all Exprs referenced in the

     * Schedule. */

    void accept(IRVisitor *) const;


    /** Pass an IRMutator through to all Exprs referenced in the

     * Schedule. */

    void mutate(IRMutator *);

};


}  // namespace Internal

}  // namespace Halide


#endif

DeviceAPI.h
Defines DeviceAPI.

Expr.h
Base classes for Halide expressions (Halide::Expr) and statements (Halide::Internal::Stmt)

FunctionPtr.h

LoopPartitioningDirective.h
Defines the Partition enum.

Parameter.h
Defines the internal representation of parameters to halide piplines.

PrefetchDirective.h
Defines the PrefetchDirective struct.

Halide::Func
A halide function.
Definition Func.h:700

Halide::Internal::FuncSchedule
A schedule for a Function of a Halide pipeline.
Definition Schedule.h:577

Halide::Internal::FuncSchedule::memoize_eviction_key
Expr & memoize_eviction_key()
This flag is set to true if the schedule is memoized and has an attached eviction key.

Halide::Internal::FuncSchedule::ring_buffer
Expr & ring_buffer() const

Halide::Internal::FuncSchedule::add_wrapper
void add_wrapper(const std::string &f, const Internal::FunctionPtr &wrapper)

Halide::Internal::FuncSchedule::estimates
const std::vector< Bound > & estimates() const
You may explicitly specify an estimate of some of the function dimensions.

Halide::Internal::FuncSchedule::bounds
const std::vector< Bound > & bounds() const
You may explicitly bound some of the dimensions of a function, or constrain them to lie on multiples ...

Halide::Internal::FuncSchedule::mutate
void mutate(IRMutator *)
Pass an IRMutator through to all Exprs referenced in the Schedule.

Halide::Internal::FuncSchedule::memoize_eviction_key
Expr memoize_eviction_key() const

Halide::Internal::FuncSchedule::async
bool & async()
Is the production of this Function done asynchronously.

Halide::Internal::FuncSchedule::hoist_storage_level
const LoopLevel & hoist_storage_level() const

Halide::Internal::FuncSchedule::hoist_storage_level
LoopLevel & hoist_storage_level()

Halide::Internal::FuncSchedule::accept
void accept(IRVisitor *) const
Pass an IRVisitor through to all Exprs referenced in the Schedule.

Halide::Internal::FuncSchedule::store_level
const LoopLevel & store_level() const
At what sites should we inject the allocation and the computation of this function?...

Halide::Internal::FuncSchedule::memory_type
MemoryType & memory_type()

Halide::Internal::FuncSchedule::FuncSchedule
FuncSchedule()

Halide::Internal::FuncSchedule::wrappers
const std::map< std::string, Internal::FunctionPtr > & wrappers() const
Mark calls of a function by 'f' to be replaced with its identity wrapper or clone during the lowering...

Halide::Internal::FuncSchedule::async
bool async() const

Halide::Internal::FuncSchedule::store_level
LoopLevel & store_level()

Halide::Internal::FuncSchedule::memory_type
MemoryType memory_type() const
The memory type (heap/stack/shared/etc) used to back this Func.

Halide::Internal::FuncSchedule::storage_dims
std::vector< StorageDim > & storage_dims()

Halide::Internal::FuncSchedule::FuncSchedule
FuncSchedule(const FuncSchedule &other)=default

Halide::Internal::FuncSchedule::ring_buffer
Expr & ring_buffer()

Halide::Internal::FuncSchedule::FuncSchedule
FuncSchedule(IntrusivePtr< FuncScheduleContents > c)
Definition Schedule.h:581

Halide::Internal::FuncSchedule::wrappers
std::map< std::string, Internal::FunctionPtr > & wrappers()

Halide::Internal::FuncSchedule::deep_copy
FuncSchedule deep_copy(std::map< FunctionPtr, FunctionPtr > &copied_map) const
Return a deep copy of this FuncSchedule.

Halide::Internal::FuncSchedule::memoized
bool memoized() const

Halide::Internal::FuncSchedule::memoized
bool & memoized()
This flag is set to true if the schedule is memoized.

Halide::Internal::FuncSchedule::compute_level
const LoopLevel & compute_level() const

Halide::Internal::FuncSchedule::compute_level
LoopLevel & compute_level()

Halide::Internal::FuncSchedule::estimates
std::vector< Bound > & estimates()

Halide::Internal::FuncSchedule::storage_dims
const std::vector< StorageDim > & storage_dims() const
The list and order of dimensions used to store this function.

Halide::Internal::FuncSchedule::bounds
std::vector< Bound > & bounds()

Halide::Internal::Function
A reference-counted handle to Halide's internal representation of a function.
Definition Function.h:39

Halide::Internal::IRMutator
A base class for passes over the IR which modify it (e.g.
Definition IRMutator.h:26

Halide::Internal::IRVisitor
A base class for algorithms that need to recursively walk over the IR.
Definition IRVisitor.h:19

Halide::Internal::StageSchedule
A schedule for a single stage of a Halide pipeline.
Definition Schedule.h:685

Halide::Internal::StageSchedule::StageSchedule
StageSchedule(IntrusivePtr< StageScheduleContents > c)
Definition Schedule.h:689

Halide::Internal::StageSchedule::prefetches
std::vector< PrefetchDirective > & prefetches()

Halide::Internal::StageSchedule::rvars
std::vector< ReductionVariable > & rvars()

Halide::Internal::StageSchedule::rvars
const std::vector< ReductionVariable > & rvars() const
RVars of reduction domain associated with this schedule if there is any.

Halide::Internal::StageSchedule::get_copy
StageSchedule get_copy() const
Return a copy of this StageSchedule.

Halide::Internal::StageSchedule::touched
bool & touched()
This flag is set to true if the dims list has been manipulated by the user (or if a ScheduleHandle wa...

Halide::Internal::StageSchedule::StageSchedule
StageSchedule()

Halide::Internal::StageSchedule::touched
bool touched() const

Halide::Internal::StageSchedule::fused_pairs
std::vector< FusedPair > & fused_pairs()

Halide::Internal::StageSchedule::fused_pairs
const std::vector< FusedPair > & fused_pairs() const
List of function stages that are to be fused with this function stage from the outermost loop to a ce...

Halide::Internal::StageSchedule::splits
std::vector< Split > & splits()

Halide::Internal::StageSchedule::allow_race_conditions
bool allow_race_conditions() const
Are race conditions permitted?

Halide::Internal::StageSchedule::override_atomic_associativity_test
bool & override_atomic_associativity_test()

Halide::Internal::StageSchedule::StageSchedule
StageSchedule(const std::vector< ReductionVariable > &rvars, const std::vector< Split > &splits, const std::vector< Dim > &dims, const std::vector< PrefetchDirective > &prefetches, const FuseLoopLevel &fuse_level, const std::vector< FusedPair > &fused_pairs, bool touched, bool allow_race_conditions, bool atomic, bool override_atomic_associativity_test)

Halide::Internal::StageSchedule::fuse_level
const FuseLoopLevel & fuse_level() const
Innermost loop level of fused loop nest for this function stage.

Halide::Internal::StageSchedule::atomic
bool atomic() const
Use atomic update?

Halide::Internal::StageSchedule::splits
const std::vector< Split > & splits() const
The traversal of the domain of a function can have some of its dimensions split into sub-dimensions.

Halide::Internal::StageSchedule::dims
std::vector< Dim > & dims()

Halide::Internal::StageSchedule::dims
const std::vector< Dim > & dims() const
The list and ordering of dimensions used to evaluate this function, after all splits have taken place...

Halide::Internal::StageSchedule::fuse_level
FuseLoopLevel & fuse_level()

Halide::Internal::StageSchedule::mutate
void mutate(IRMutator *)
Pass an IRMutator through to all Exprs referenced in the Schedule.

Halide::Internal::StageSchedule::atomic
bool & atomic()

Halide::Internal::StageSchedule::accept
void accept(IRVisitor *) const
Pass an IRVisitor through to all Exprs referenced in the Schedule.

Halide::Internal::StageSchedule::prefetches
const std::vector< PrefetchDirective > & prefetches() const
You may perform prefetching in some of the dimensions of a function.

Halide::Internal::StageSchedule::allow_race_conditions
bool & allow_race_conditions()

Halide::Internal::StageSchedule::override_atomic_associativity_test
bool override_atomic_associativity_test() const
Atomic updates are only allowed on associative reductions.

Halide::Internal::StageSchedule::StageSchedule
StageSchedule(const StageSchedule &other)=default

Halide::LoopLevel
A reference to a site in a Halide statement at the top of the body of a particular for loop.
Definition Schedule.h:203

Halide::LoopLevel::var
VarOrRVar var() const

Halide::LoopLevel::to_string
std::string to_string() const

Halide::LoopLevel::root
static LoopLevel root()
Construct a special LoopLevel value which represents the location outside of all for loops.

Halide::LoopLevel::LoopLevel
LoopLevel(const Internal::Function &f, const VarOrRVar &v, int stage_index=-1)
Identify the loop nest corresponding to some dimension of some function.

Halide::LoopLevel::inlined
static LoopLevel inlined()
Construct a special LoopLevel value that implies that a function should be inlined away.

Halide::LoopLevel::get_stage_index
int get_stage_index() const

Halide::LoopLevel::LoopLevel
LoopLevel(const Func &f, const VarOrRVar &v, int stage_index=-1)

Halide::LoopLevel::operator==
bool operator==(const LoopLevel &other) const

Halide::LoopLevel::func
std::string func() const

Halide::LoopLevel::stage_index
int stage_index() const
Return the index of the function stage associated with this loop level.

Halide::LoopLevel::LoopLevel
LoopLevel()
Construct an undefined LoopLevel.

Halide::LoopLevel::var_name
std::string var_name() const

Halide::LoopLevel::set
void set(const LoopLevel &other)
Mutate our contents to match the contents of 'other'.

Halide::LoopLevel::match
bool match(const LoopLevel &other) const

Halide::LoopLevel::locked
bool locked() const

Halide::LoopLevel::is_root
bool is_root() const

Halide::LoopLevel::LoopLevel
LoopLevel(const std::string &func_name, const std::string &var_name, bool is_rvar, int stage_index, bool locked=false)
For deserialization only.

Halide::LoopLevel::is_inlined
bool is_inlined() const

Halide::LoopLevel::operator!=
bool operator!=(const LoopLevel &other) const
Definition Schedule.h:294

Halide::LoopLevel::func_name
std::string func_name() const

Halide::LoopLevel::defined
bool defined() const

Halide::LoopLevel::match
bool match(const std::string &loop) const

Halide::LoopLevel::is_rvar
bool is_rvar() const

Halide::LoopLevel::lock
LoopLevel & lock()

Halide::Internal::ArgInfoKind::Function
@ Function

Halide::Internal::DimType
DimType
Each Dim below has a dim_type, which tells you what transformations are legal on it.
Definition Schedule.h:357

Halide::Internal::DimType::ImpureRVar
@ ImpureRVar
The dim originated from an RVar.

Halide::Internal::DimType::PureRVar
@ PureRVar
The dim originated from an RVar.

Halide::Internal::DimType::PureVar
@ PureVar
This dim originated from a Var.

Halide::Internal::ForType
ForType
An enum describing a type of loop traversal.
Definition Expr.h:406

Halide::Internal::is_unordered_parallel
bool is_unordered_parallel(ForType for_type)
Check if for_type executes for loop iterations in parallel and unordered.

Halide::Internal::is_parallel
bool is_parallel(ForType for_type)
Returns true if for_type executes for loop iterations in parallel.

Halide
This file defines the class FunctionDAG, which is our representation of a Halide pipeline,...
Definition AbstractGenerator.h:19

Halide::LinkageType::Internal
@ Internal
Not visible externally, similar to 'static' linkage in C.

Halide::PrefetchBoundStrategy::GuardWithIf
@ GuardWithIf
Guard the prefetch with if-guards that ignores the prefetch if any of the prefetched region ever goes...

Halide::TailStrategy
TailStrategy
Different ways to handle a tail case in a split when the factor does not provably divide the extent.
Definition Schedule.h:33

Halide::TailStrategy::RoundUp
@ RoundUp
Round up the extent to be a multiple of the split factor.

Halide::TailStrategy::RoundUpAndBlend
@ RoundUpAndBlend
Equivalent to RoundUp, but protected values that would be written beyond the end by loading the memor...

Halide::TailStrategy::Predicate
@ Predicate
Guard the loads and stores in the loop with an if statement that prevents evaluation beyond the origi...

Halide::TailStrategy::PredicateStores
@ PredicateStores
Guard the stores in the loop with an if statement that prevents evaluation beyond the original extent...

Halide::TailStrategy::ShiftInwardsAndBlend
@ ShiftInwardsAndBlend
Equivalent to ShiftInwards, but protects values that would be re-evaluated by loading the memory loca...

Halide::TailStrategy::ShiftInwards
@ ShiftInwards
Prevent evaluation beyond the original extent by shifting the tail case inwards, re-evaluating some p...

Halide::TailStrategy::PredicateLoads
@ PredicateLoads
Guard the loads in the loop with an if statement that prevents evaluation beyond the original extent.

Halide::LoopAlignStrategy
LoopAlignStrategy
Different ways to handle the case when the start/end of the loops of stages computed with (fused) are...
Definition Schedule.h:137

Halide::LoopAlignStrategy::NoAlign
@ NoAlign
compute_with will make no attempt to align the start/end of the fused loops.

Halide::LoopAlignStrategy::AlignEnd
@ AlignEnd
Shift the end of the fused loops to align.

Halide::LoopAlignStrategy::AlignStart
@ AlignStart
Shift the start of the fused loops to align.

Halide::DeviceAPI
DeviceAPI
An enum describing a type of device API.
Definition DeviceAPI.h:15

Halide::MemoryType
MemoryType
An enum describing different address spaces to be used with Func::store_in.
Definition Expr.h:353

Halide::MemoryType::Auto
@ Auto
Let Halide select a storage type automatically.

Halide::Partition
Partition
Different ways to handle loops with a potentially optimizable boundary conditions.
Definition LoopPartitioningDirective.h:16

Halide::Expr
A fragment of Halide syntax.
Definition Expr.h:258

Halide::FuseLoopLevel
Definition Schedule.h:304

Halide::FuseLoopLevel::align
std::map< std::string, LoopAlignStrategy > align
Contains alignment strategies for the fused dimensions (indexed by the dimension name).
Definition Schedule.h:310

Halide::FuseLoopLevel::FuseLoopLevel
FuseLoopLevel()
Definition Schedule.h:312

Halide::FuseLoopLevel::FuseLoopLevel
FuseLoopLevel(const LoopLevel &level, const std::map< std::string, LoopAlignStrategy > &align)
Definition Schedule.h:315

Halide::FuseLoopLevel::level
LoopLevel level
Definition Schedule.h:305

Halide::Internal::Bound
A bound on a loop, typically from Func::bound.
Definition Schedule.h:492

Halide::Internal::Bound::min
Expr min
Declared min and extent of the loop.
Definition Schedule.h:498

Halide::Internal::Bound::var
std::string var
The loop var being bounded.
Definition Schedule.h:494

Halide::Internal::Bound::modulus
Expr modulus
If defined, the number of iterations will be a multiple of "modulus", and the first iteration will be...
Definition Schedule.h:504

Halide::Internal::Bound::remainder
Expr remainder
Definition Schedule.h:504

Halide::Internal::Bound::extent
Expr extent
Definition Schedule.h:498

Halide::Internal::Dim
The Dim struct represents one loop in the schedule's representation of a loop nest.
Definition Schedule.h:444

Halide::Internal::Dim::var
std::string var
Name of the loop variable.
Definition Schedule.h:446

Halide::Internal::Dim::dim_type
DimType dim_type
The DimType tells us what transformations are legal on this loop (see the DimType enum above).
Definition Schedule.h:456

Halide::Internal::Dim::partition_policy
Partition partition_policy
The strategy for loop partitioning.
Definition Schedule.h:459

Halide::Internal::Dim::is_rvar
bool is_rvar() const
Did this loop originate from an RVar (in which case the bounds of the loops are algorithmically meani...
Definition Schedule.h:470

Halide::Internal::Dim::for_type
ForType for_type
How are the loop values traversed (e.g.
Definition Schedule.h:449

Halide::Internal::Dim::device_api
DeviceAPI device_api
On what device does the body of the loop execute (e.g.
Definition Schedule.h:452

Halide::Internal::Dim::is_parallel
bool is_parallel() const
Could multiple iterations of this loop happen at the same time? Vectorized and GPULanes loop types ar...
Definition Schedule.h:486

Halide::Internal::Dim::is_unordered_parallel
bool is_unordered_parallel() const
Could multiple iterations of this loop happen at the same time, with reads and writes interleaved in ...
Definition Schedule.h:478

Halide::Internal::Dim::is_pure
bool is_pure() const
Can this loop be evaluated in any order (including in parallel)? Equivalently, are there no data haza...
Definition Schedule.h:464

Halide::Internal::FunctionPtr
A possibly-weak pointer to a Halide function.
Definition FunctionPtr.h:27

Halide::Internal::FusedPair
This represents two stages with fused loop nests from outermost to a specific loop level.
Definition Schedule.h:534

Halide::Internal::FusedPair::var_name
std::string var_name
Definition Schedule.h:539

Halide::Internal::FusedPair::FusedPair
FusedPair()=default

Halide::Internal::FusedPair::func_2
std::string func_2
Definition Schedule.h:536

Halide::Internal::FusedPair::stage_2
size_t stage_2
Definition Schedule.h:538

Halide::Internal::FusedPair::operator==
bool operator==(const FusedPair &other) const
Definition Schedule.h:547

Halide::Internal::FusedPair::FusedPair
FusedPair(const std::string &f1, size_t s1, const std::string &f2, size_t s2, const std::string &var)
Definition Schedule.h:542

Halide::Internal::FusedPair::func_1
std::string func_1
Definition Schedule.h:535

Halide::Internal::FusedPair::stage_1
size_t stage_1
Definition Schedule.h:537

Halide::Internal::FusedPair::operator<
bool operator<(const FusedPair &other) const
Definition Schedule.h:552

Halide::Internal::IntrusivePtr
Intrusive shared pointers have a reference count (a RefCount object) stored in the class itself.
Definition IntrusivePtr.h:71

Halide::Internal::Split
Definition Schedule.h:325

Halide::Internal::Split::SplitType
SplitType
Definition Schedule.h:333

Halide::Internal::Split::FuseVars
@ FuseVars
Definition Schedule.h:335

Halide::Internal::Split::RenameVar
@ RenameVar
Definition Schedule.h:334

Halide::Internal::Split::PurifyRVar
@ PurifyRVar
Definition Schedule.h:336

Halide::Internal::Split::SplitVar
@ SplitVar
Definition Schedule.h:333

Halide::Internal::Split::tail
TailStrategy tail
Definition Schedule.h:331

Halide::Internal::Split::old_var
std::string old_var
Definition Schedule.h:326

Halide::Internal::Split::split_type
SplitType split_type
Definition Schedule.h:350

Halide::Internal::Split::exact
bool exact
Definition Schedule.h:328

Halide::Internal::Split::factor
Expr factor
Definition Schedule.h:327

Halide::Internal::Split::inner
std::string inner
Definition Schedule.h:326

Halide::Internal::Split::outer
std::string outer
Definition Schedule.h:326

Halide::Internal::StorageDim
Properties of one axis of the storage of a Func.
Definition Schedule.h:508

Halide::Internal::StorageDim::fold_forward
bool fold_forward
Definition Schedule.h:525

Halide::Internal::StorageDim::var
std::string var
The var in the pure definition corresponding to this axis.
Definition Schedule.h:510

Halide::Internal::StorageDim::alignment
Expr alignment
The bounds allocated (not computed) must be a multiple of "alignment".
Definition Schedule.h:514

Halide::Internal::StorageDim::bound
Expr bound
The bounds allocated (not computed).
Definition Schedule.h:517

Halide::Internal::StorageDim::fold_factor
Expr fold_factor
If the Func is explicitly folded along this axis (with Func::fold_storage) this gives the extent of t...
Definition Schedule.h:524

Halide::VarOrRVar
A class that can represent Vars or RVars.
Definition Func.h:29