1#ifndef HALIDE_CODEGEN_LLVM_H
2#define HALIDE_CODEGEN_LLVM_H
15class IRBuilderDefaultInserter;
17template<
typename,
typename>
49struct ExternSignature;
81 const std::string &suffix,
82 const std::vector<std::pair<std::string, ExternSignature>> &externs);
105 const std::string &extern_name,
const std::vector<LoweredArgument> &args);
106 virtual void end_func(
const std::vector<LoweredArgument> &args);
125 virtual std::string
mabi()
const;
167 std::unique_ptr<llvm::IRBuilder<llvm::ConstantFolder, llvm::IRBuilderDefaultInserter>>
builder;
200 bool must_succeed =
true)
const;
290 llvm::Constant *
create_binary_blob(
const std::vector<char> &data,
const std::string &name,
bool constant =
true);
409 int effective_vscale)
const;
414 bool zero_initialize =
false,
415 const std::string &name =
"");
456 llvm::Function *
get_llvm_intrin(
const Type &ret_type,
const std::string &name,
const std::vector<Type> &arg_types,
bool scalars_are_vectors =
false);
457 llvm::Function *
get_llvm_intrin(llvm::Type *ret_type,
const std::string &name,
const std::vector<llvm::Type *> &arg_types);
459 llvm::Function *
declare_intrin_overload(
const std::string &name,
const Type &ret_type,
const std::string &impl_name, std::vector<Type> arg_types,
bool scalars_are_vectors =
false);
474 const std::string &name, std::vector<Expr>);
476 llvm::Function *intrin, std::vector<Expr>);
478 const std::string &name, std::vector<llvm::Value *>,
479 bool scalable_vector_result =
false,
bool is_reduction =
false);
481 llvm::Function *intrin, std::vector<llvm::Value *>,
482 bool is_reduction =
false);
487 virtual llvm::Value *
slice_vector(llvm::Value *vec,
int start,
int extent);
496 const std::vector<int> &indices);
543 const std::vector<llvm::Value *> &args,
544 llvm::Type *result_type);
554 llvm::Type *desired_type);
619 using MaskVariant = std::variant<NoMask, AllEnabledMask, llvm::Value *>;
668 std::map<std::string, llvm::Constant *> string_constants;
673 llvm::BasicBlock *destructor_block =
nullptr;
679 bool llvm_large_code_model;
684 int effective_vscale = 0;
690 int producer_consumer_id = 0;
698 llvm::Function *embed_metadata_getter(
const std::string &metadata_getter_name,
699 const std::string &function_name,
const std::vector<LoweredArgument> &args,
703 llvm::Constant *embed_constant_expr(
Expr e, llvm::Type *t);
704 llvm::Constant *embed_constant_scalar_value_t(
const Expr &e);
706 llvm::Function *add_argv_wrapper(llvm::Function *fn,
const std::string &name,
707 bool result_in_argv, std::vector<bool> &arg_is_buffer);
709 llvm::Value *codegen_vector_load(
const Type &type,
const std::string &name,
const Expr &base,
711 llvm::Value *vpred =
nullptr,
bool slice_to_native =
true, llvm::Value *stride =
nullptr);
713 virtual void codegen_predicated_load(
const Load *op);
714 virtual void codegen_predicated_store(
const Store *op);
716 void codegen_atomic_rmw(
const Store *op);
718 void init_codegen(
const std::string &name,
bool any_strict_float =
false);
719 std::unique_ptr<llvm::Module> finish_codegen();
722 template<
typename Op>
723 bool try_to_fold_vector_reduce(
const Expr &a,
Expr b);
730 std::map<llvm::Value *, llvm::Type *> struct_type_recovery;
737 llvm::LLVMContext &context);
Defines the base class for things that recursively walk over the IR.
Defines Module, an IR container that fully describes a Halide program.
Defines the Scope class, which is used for keeping track of names in a scope while traversing IR.
Defines the structure that describes a Halide target.
A code generator abstract base class.
llvm::Constant * get_splat(int lanes, llvm::Constant *value, VectorTypeConstraint type_constraint=VectorTypeConstraint::None) const
void visit(const Let *) override
std::pair< llvm::Function *, int > find_vector_runtime_function(const std::string &name, int lanes)
Go looking for a vector version of a runtime function.
llvm::StructType * metadata_t_type
virtual void end_func(const std::vector< LoweredArgument > &args)
llvm::StructType * argument_t_type
llvm::Constant * create_binary_blob(const std::vector< char > &data, const std::string &name, bool constant=true)
Put a binary blob in the module as a global variable and return a pointer to it.
void visit(const Select *) override
virtual int maximum_vector_bits() const
Used to decide whether to break a vector up into multiple smaller operations.
void visit(const Or *) override
void visit(const AssertStmt *) override
bool emit_atomic_stores
Emit atomic store instructions?
void sym_pop(const std::string &name)
Remove an entry for the symbol table, revealing any previous entries with the same name.
virtual void init_context()
Grab all the context specific internal state.
virtual int target_vscale() const
For architectures that have vscale vectors, return the constant vscale to use.
llvm::StructType * device_interface_t_type
void visit(const Mul *) override
llvm::StructType * semaphore_t_type
virtual bool supports_atomic_add(const Type &t) const
virtual llvm::Type * llvm_type_of(const Type &) const
Get the llvm type equivalent to the given halide type in the current context.
std::vector< LoweredArgument > current_function_args
const Target & get_target() const
The target we're generating code for.
void return_with_error_code(llvm::Value *error_code)
Return the the pipeline with the given error code.
llvm::Value * call_intrin(const llvm::Type *t, int intrin_lanes, const std::string &name, std::vector< llvm::Value * >, bool scalable_vector_result=false, bool is_reduction=false)
virtual std::unique_ptr< llvm::Module > compile(const Module &module)
Takes a halide Module and compiles it to an llvm Module.
llvm::Value * codegen_buffer_pointer(const std::string &buffer, Type type, Expr index)
llvm::Function * get_llvm_intrin(llvm::Type *ret_type, const std::string &name, const std::vector< llvm::Type * > &arg_types)
virtual std::string mcpu_tune() const =0
bool try_vector_predication_intrinsic(const std::string &name, VPResultType result_type, int32_t length, MaskVariant mask, std::vector< VPArg > args)
Generate an intrisic call if use_llvm_vp_intrinsics is true and length is greater than 1.
void trigger_destructor(llvm::Function *destructor_fn, llvm::Value *stack_slot)
Call a destructor early.
void visit(const Reinterpret *) override
void visit(const Min *) override
void visit(const For *) override
llvm::Value * codegen_buffer_pointer(llvm::Value *base_address, Type type, llvm::Value *index)
void visit(const Not *) override
void visit(const Sub *) override
void sym_push(const std::string &name, llvm::Value *value)
Add an entry to the symbol table, hiding previous entries with the same name.
llvm::StructType * type_t_type
VectorTypeConstraint
Interface to abstract vector code generation as LLVM is now providing multiple options to express eve...
@ Fixed
Use default for current target.
@ VScale
Force use of fixed size vectors.
DestructorType
Some destructors should always be called.
Halide::Target target
The target we're generating code for.
virtual void begin_func(LinkageType linkage, const std::string &simple_name, const std::string &extern_name, const std::vector< LoweredArgument > &args)
Helper functions for compiling Halide functions to llvm functions.
bool try_vector_predication_comparison(const std::string &name, const Type &result_type, MaskVariant mask, llvm::Value *a, llvm::Value *b, const char *cmp_op)
Generate a vector predicated comparison intrinsic call if use_llvm_vp_intrinsics is true and result_t...
llvm::StructType * scalar_value_t_type
std::unique_ptr< llvm::IRBuilder< llvm::ConstantFolder, llvm::IRBuilderDefaultInserter > > builder
bool use_llvm_vp_intrinsics
Controls use of vector predicated intrinsics for vector operations.
llvm::Value * create_broadcast(llvm::Value *, int lanes)
Widen an llvm scalar into an llvm vector with the given number of lanes.
std::unique_ptr< llvm::Module > module
virtual std::string mcpu_target() const =0
What should be passed as -mcpu (warning: implies attrs!), -mattrs, and related for compilation.
void visit(const Max *) override
void codegen(const Stmt &)
Emit code that runs a statement.
virtual void compile_buffer(const Buffer<> &buffer)
virtual bool promote_indices() const
Should indexing math be promoted to 64-bit on platforms with 64-bit pointers?
llvm::Function * function
void visit(const GE *) override
void visit(const Variable *) override
void declare_intrin_overload(const std::string &name, const Type &ret_type, llvm::Function *impl, std::vector< Type > arg_types)
llvm::Value * codegen_dense_vector_load(const Load *load, llvm::Value *vpred=nullptr, bool slice_to_native=true)
Generate a basic dense vector load, with an optional predicate and control over whether or not we sho...
void visit(const Atomic *) override
void create_assertion(llvm::Value *condition, const Expr &message, llvm::Value *error_code=nullptr)
Codegen an assertion.
virtual Type upgrade_type_for_storage(const Type &) const
Return the type that a given Halide type should be stored/loaded from memory as.
void visit(const Shuffle *) override
llvm::MDNode * very_likely_branch
void visit(const Allocate *) override=0
Generate code for an allocate node.
void visit(const Prefetch *) override
void visit(const Provide *) override
These IR nodes should have been removed during lowering.
virtual Type upgrade_type_for_arithmetic(const Type &) const
Return the type in which arithmetic should be done for the given storage type.
llvm::Value * normalize_fixed_scalable_vector_type(llvm::Type *desired_type, llvm::Value *result)
Ensure that a vector value is either fixed or vscale depending to match desired_type.
void visit(const Div *) override
llvm::StructType * halide_buffer_t_type
WarningKind
Warning messages which we want to avoid displaying number of times.
void visit(const EQ *) override
virtual int native_vector_bits() const =0
What's the natural vector bit-width to use for loads, stores, etc.
llvm::Value * match_vector_type_scalable(llvm::Value *value, llvm::Type *guide)
llvm::StructType * dimension_t_type
std::map< WarningKind, std::string > onetime_warnings
static std::unique_ptr< llvm::Module > compile_trampolines(const Target &target, llvm::LLVMContext &context, const std::string &suffix, const std::vector< std::pair< std::string, ExternSignature > > &externs)
virtual void compile_func(const LoweredFunc &func, const std::string &simple_name, const std::string &extern_name)
Compile a specific halide declaration into the llvm Module.
llvm::FunctionType * signature_to_type(const ExternSignature &signature)
Given a Halide ExternSignature, return the equivalent llvm::FunctionType.
llvm::Constant * create_string_constant(const std::string &str)
Put a string constant in the module as a global variable and return a pointer to it.
virtual llvm::Value * concat_vectors(const std::vector< llvm::Value * > &)
Concatenate a bunch of llvm vectors.
llvm::LLVMContext * context
llvm::Value * create_alloca_at_entry(llvm::Type *type, int n, bool zero_initialize=false, const std::string &name="")
Perform an alloca at the function entrypoint.
llvm::Value * match_vector_type_scalable(llvm::Value *value, llvm::Value *guide)
virtual std::string get_allocation_name(const std::string &n)
Get a unique name for the actual block of memory that an allocate node uses.
virtual bool use_soft_float_abi() const =0
virtual std::string mabi() const
void visit(const Evaluate *) override
std::map< std::string, std::vector< Intrinsic > > intrinsics
Mapping of intrinsic functions to the various overloads implementing it.
std::string mangle_llvm_type(llvm::Type *type)
Return type string for LLVM type using LLVM IR intrinsic type mangling.
void visit(const LE *) override
llvm::Value * codegen_buffer_pointer(const std::string &buffer, Type type, llvm::Value *index)
Generate a pointer into a named buffer at a given index, of a given type.
void visit(const NE *) override
void add_tbaa_metadata(llvm::Instruction *inst, std::string buffer, const Expr &index)
Mark a load or store with type-based-alias-analysis metadata so that llvm knows it can reorder loads ...
void visit(const And *) override
virtual bool use_pic() const
void scalarize(const Expr &)
Codegen a vector Expr by codegenning each lane and combining.
void visit(const StringImm *) override
virtual void codegen_vector_reduce(const VectorReduce *op, const Expr &init)
Compile a horizontal reduction that starts with an explicit initial value.
void codegen_asserts(const std::vector< const AssertStmt * > &asserts)
Codegen a block of asserts with pure conditions.
llvm::Function * get_llvm_intrin(const Type &ret_type, const std::string &name, const std::vector< Type > &arg_types, bool scalars_are_vectors=false)
Get an LLVM intrinsic declaration.
size_t get_requested_alloca_total() const
static std::unique_ptr< CodeGen_LLVM > new_for_target(const Target &target, llvm::LLVMContext &context)
Create an instance of CodeGen_LLVM suitable for the target.
virtual Type upgrade_type_for_argument_passing(const Type &) const
Return the type that a Halide type should be passed in and out of functions as.
void visit(const GT *) override
llvm::Value * fixed_to_scalable_vector_type(llvm::Value *fixed)
Convert an LLVM fixed vector value to the corresponding vscale vector value.
void visit(const Cast *) override
CodeGen_LLVM(const Target &t)
void visit(const Ramp *) override
void visit(const Broadcast *) override
void visit(const Mod *) override
llvm::Value * make_halide_type_t(const Type &)
Turn a Halide Type into an llvm::Value representing a constant halide_type_t.
void visit(const Call *) override
static void initialize_llvm()
Initialize internal llvm state for the enabled targets.
llvm::Value * simple_call_intrin(const std::string &intrin, const std::vector< llvm::Value * > &args, llvm::Type *result_type)
call_intrin does far too much to be useful and generally breaks things when one has carefully set thi...
void set_context(llvm::LLVMContext &context)
Tell the code generator which LLVM context to use.
void visit(const Store *) override
int get_vector_num_elements(const llvm::Type *t)
Get number of vector elements, taking into account scalable vectors.
llvm::Value * call_intrin(const Type &t, int intrin_lanes, llvm::Function *intrin, std::vector< Expr >)
llvm::Value * register_destructor(llvm::Function *destructor_fn, llvm::Value *obj, DestructorType when)
llvm::Value * codegen(const Expr &)
Emit code that evaluates an expression, and return the llvm representation of the result of the expre...
void optimize_module()
Run all of llvm's optimization passes on the module.
llvm::Value * codegen_buffer_pointer(llvm::Value *base_address, Type type, Expr index)
llvm::Value * sym_get(const std::string &name, bool must_succeed=true) const
Fetch an entry from the symbol table.
void visit(const ProducerConsumer *) override
virtual llvm::Value * shuffle_vectors(llvm::Value *a, llvm::Value *b, const std::vector< int > &indices)
Create an LLVM shuffle vectors instruction.
void visit(const LT *) override
void visit(const Load *) override
std::variant< NoMask, AllEnabledMask, llvm::Value * > MaskVariant
Predication mask using the above two types for special cases and an llvm::Value for the general one.
llvm::Value * convert_fixed_or_scalable_vector_type(llvm::Value *arg, llvm::Type *desired_type)
Convert between two LLVM vectors of potentially different scalable/fixed and size.
virtual llvm::Value * slice_vector(llvm::Value *vec, int start, int extent)
Take a slice of lanes out of an llvm vector.
llvm::Function * declare_intrin_overload(const std::string &name, const Type &ret_type, const std::string &impl_name, std::vector< Type > arg_types, bool scalars_are_vectors=false)
Declare an intrinsic function that participates in overload resolution.
bool inside_atomic_mutex_node
Are we inside an atomic node that uses mutex locks? This is used for detecting deadlocks from nested ...
void visit(const FloatImm *) override
void visit(const IntImm *) override
Generate code for various IR nodes.
void function_does_not_access_memory(llvm::Function *fn)
Add the appropriate function attribute to tell LLVM that the function doesn't access memory.
llvm::MDNode * strict_fp_math_md
llvm::Value * call_intrin(const Type &t, int intrin_lanes, const std::string &name, std::vector< Expr >)
Generate a call to a vector intrinsic or runtime inlined function.
llvm::BasicBlock * get_destructor_block()
Retrieves the block containing the error handling code.
virtual void init_module()
Initialize the CodeGen_LLVM internal state to compile a fresh module.
llvm::Value * get_user_context() const
The user_context argument.
virtual std::string mattrs() const =0
void visit(const Realize *) override
void visit(const VectorReduce *) override
void visit(const IfThenElse *) override
size_t requested_alloca_total
A (very) conservative guess at the size of all alloca() storage requested (including alignment paddin...
Expr wild_u1x_
Some wildcard variables used for peephole optimizations in subclasses.
llvm::StructType * pseudostack_slot_t_type
llvm::Type * llvm_type_of(llvm::LLVMContext *context, Halide::Type t, int effective_vscale) const
Get the llvm type equivalent to a given halide type.
llvm::Value * scalable_to_fixed_vector_type(llvm::Value *scalable)
Convert an LLVM vscale vector value to the corresponding fixed vector value.
llvm::MDNode * default_fp_math_md
llvm::Value * call_intrin(const llvm::Type *t, int intrin_lanes, llvm::Function *intrin, std::vector< llvm::Value * >, bool is_reduction=false)
bool sym_exists(const std::string &name) const
Test if an item exists in the symbol table.
llvm::Value * match_vector_type_scalable(llvm::Value *value, VectorTypeConstraint constraint)
Make sure a value type has the same scalable/fixed vector type as a guide.
void visit(const Free *) override=0
Generate code for a free node.
llvm::Value * shuffle_vectors(llvm::Value *v, const std::vector< int > &indices)
Shorthand for shuffling a single vector.
void visit(const Add *) override
void visit(const Block *) override
virtual bool supports_call_as_float16(const Call *op) const
Can we call this operation with float16 type? This is used to avoid "emulated" equivalent code-gen in...
void visit(const UIntImm *) override
void visit(const LetStmt *) override
llvm::Type * void_t
Some useful llvm types.
llvm::Value * call_overloaded_intrin(const Type &result_type, const std::string &name, const std::vector< Expr > &args)
Call an overloaded intrinsic function.
virtual llvm::Value * interleave_vectors(const std::vector< llvm::Value * > &)
Implementation of the intrinsic call to interleave_vectors.
llvm::Type * get_vector_type(llvm::Type *, int n, VectorTypeConstraint type_constraint=VectorTypeConstraint::None) const
A base class for algorithms that need to recursively walk over the IR.
virtual void visit(const IntImm *)
A common pattern when traversing Halide IR is that you need to keep track of stuff when you find a Le...
A reference-counted handle to a parameter to a halide pipeline.
This file defines the class FunctionDAG, which is our representation of a Halide pipeline,...
LinkageType
Type of linkage a function in a lowered Halide module can have.
std::unique_ptr< llvm::Module > codegen_llvm(const Module &module, llvm::LLVMContext &context)
Given a Halide module, generate an llvm::Module.
std::map< std::string, std::string > MetadataNameMap
signed __INT32_TYPE__ int32_t
A fragment of Halide syntax.
The sum of two expressions.
Allocate a scratch area called with the given name, type, and size.
Logical and - are both expressions true.
If the 'condition' is false, then evaluate and return the message, which should be a call to an error...
Lock all the Store nodes in the body statement.
A sequence of statements to be executed in-order.
A vector with 'lanes' elements, in which every element is 'value'.
The actual IR nodes begin here.
Type indicating mask to use is all true – all lanes enabled.
Description of an intrinsic function overload.
Intrinsic(Type result_type, std::vector< Type > arg_types, llvm::Function *impl)
std::vector< Type > arg_types
Type indicating an intrinsic does not take a mask.
Support for generating LLVM vector predication intrinsics ("@llvm.vp.*" and "@llvm....
std::optional< size_t > mangle_index
VPArg(llvm::Value *value, std::optional< size_t > mangle_index=std::nullopt, int32_t alignment=0)
VPResultType(llvm::Type *type, std::optional< size_t > mangle_index=std::nullopt)
std::optional< size_t > mangle_index
The ratio of two expressions.
Is the first expression equal to the second.
Evaluate and discard an expression, presumably because it has some side-effect.
Floating point constants.
Free the resources associated with the given buffer.
Is the first expression greater than or equal to the second.
Is the first expression greater than the second.
Is the first expression less than or equal to the second.
Is the first expression less than the second.
A let expression, like you might find in a functional language.
The statement form of a let node.
Load a value from a named symbol if predicate is true.
Definition of a lowered function.
The greater of two values.
The lesser of two values.
The result of modulus_remainder analysis.
The product of two expressions.
Is the first expression not equal to the second.
Logical not - true if the expression false.
Logical or - is at least one of the expression true.
Represent a multi-dimensional region of a Func or an ImageParam that needs to be prefetched.
This node is a helpful annotation to do with permissions.
This defines the value of a function at a multi-dimensional location.
A linear ramp vector node.
Allocate a multi-dimensional buffer of the given type and size.
Reinterpret value as another type, without affecting any of the bits (on little-endian systems).
Construct a new vector by taking elements from another sequence of vectors.
A reference-counted handle to a statement node.
Store a 'value' to the buffer called 'name' at a given 'index' if 'predicate' is true.
The difference of two expressions.
Unsigned integer constants.
Horizontally reduce a vector to a scalar or narrower vector using the given commutative and associati...
A struct representing a target machine and os to generate code for.
Types in the halide type system.