Halide
Halide::Internal::CodeGen_PTX_Dev Class Reference

A code generator that emits GPU code from a given Halide stmt. More...

#include <CodeGen_PTX_Dev.h>

Inherits Halide::Internal::CodeGen_LLVM, and Halide::Internal::CodeGen_GPU_Dev.

Public Member Functions

 CodeGen_PTX_Dev (Target host)
 Create a PTX device code generator. More...
 
 ~CodeGen_PTX_Dev () override
 
void add_kernel (Stmt stmt, const std::string &name, const std::vector< DeviceArgument > &args) override
 Compile a GPU kernel into the module. More...
 
std::vector< char > compile_to_src () override
 
std::string get_current_kernel_name () override
 
void dump () override
 
std::string print_gpu_name (const std::string &name) override
 Returns the specified name transformed by the variable naming rules for the GPU language backend. More...
 
std::string api_unique_name () override
 This routine returns the GPU API name that is combined into runtime routine names to ensure each GPU API has a unique name. More...
 
- Public Member Functions inherited from Halide::Internal::CodeGen_LLVM
 ~CodeGen_LLVM () override
 
virtual std::unique_ptr< llvm::Module > compile (const Module &module)
 Takes a halide Module and compiles it to an llvm Module. More...
 
const Targetget_target () const
 The target we're generating code for. More...
 
void set_context (llvm::LLVMContext &context)
 Tell the code generator which LLVM context to use. More...
 
size_t get_requested_alloca_total () const
 
- Public Member Functions inherited from Halide::Internal::IRVisitor
 IRVisitor ()
 
virtual ~IRVisitor ()
 
- Public Member Functions inherited from Halide::Internal::CodeGen_GPU_Dev
virtual ~CodeGen_GPU_Dev ()
 
virtual bool kernel_run_takes_types () const
 Allows the GPU device specific code to request halide_type_t values to be passed to the kernel_run routine rather than just argument type sizes. More...
 

Static Public Member Functions

static void test ()
 
- Static Public Member Functions inherited from Halide::Internal::CodeGen_LLVM
static CodeGen_LLVMnew_for_target (const Target &target, llvm::LLVMContext &context)
 Create an instance of CodeGen_LLVM suitable for the target. More...
 
static void initialize_llvm ()
 Initialize internal llvm state for the enabled targets. More...
 
static std::unique_ptr< llvm::Module > compile_trampolines (const Target &target, llvm::LLVMContext &context, const std::string &suffix, const std::vector< std::pair< std::string, ExternSignature >> &externs)
 
- Static Public Member Functions inherited from Halide::Internal::CodeGen_GPU_Dev
static bool is_gpu_var (const std::string &name)
 
static bool is_gpu_block_var (const std::string &name)
 
static bool is_gpu_thread_var (const std::string &name)
 
static bool is_block_uniform (const Expr &expr)
 Checks if expr is block uniform, i.e. More...
 
static bool is_buffer_constant (const Stmt &kernel, const std::string &buffer)
 Checks if the buffer is a candidate for constant storage. More...
 

Protected Member Functions

void init_module () override
 (Re)initialize the PTX module. More...
 
void visit (const Call *) override
 Nodes for which we need to override default behavior for the GPU runtime. More...
 
void visit (const For *) override
 
void visit (const Allocate *) override
 Generate code for an allocate node. More...
 
void visit (const Free *) override
 Generate code for a free node. More...
 
void visit (const AssertStmt *) override
 
void visit (const Load *) override
 
void visit (const Store *) override
 
void visit (const Atomic *) override
 
void codegen_vector_reduce (const VectorReduce *op, const Expr &init) override
 Compile a horizontal reduction that starts with an explicit initial value. More...
 
std::string march () const
 
std::string mcpu () const override
 What should be passed as -mcpu, -mattrs, and related for compilation. More...
 
std::string mattrs () const override
 
bool use_soft_float_abi () const override
 
int native_vector_bits () const override
 What's the natural vector bit-width to use for loads, stores, etc. More...
 
bool promote_indices () const override
 Should indexing math be promoted to 64-bit on platforms with 64-bit pointers? More...
 
Type upgrade_type_for_arithmetic (const Type &t) const override
 Return the type in which arithmetic should be done for the given storage type. More...
 
Type upgrade_type_for_storage (const Type &t) const override
 Return the type that a given Halide type should be stored/loaded from memory as. More...
 
std::string simt_intrinsic (const std::string &name)
 Map from simt variable names (e.g. More...
 
bool supports_atomic_add (const Type &t) const override
 
void visit (const IntImm *) override
 Generate code for various IR nodes. More...
 
void visit (const UIntImm *) override
 
void visit (const FloatImm *) override
 
void visit (const StringImm *) override
 
void visit (const Cast *) override
 
void visit (const Variable *) override
 
void visit (const Add *) override
 
void visit (const Sub *) override
 
void visit (const Mul *) override
 
void visit (const Div *) override
 
void visit (const Mod *) override
 
void visit (const Min *) override
 
void visit (const Max *) override
 
void visit (const EQ *) override
 
void visit (const NE *) override
 
void visit (const LT *) override
 
void visit (const LE *) override
 
void visit (const GT *) override
 
void visit (const GE *) override
 
void visit (const And *) override
 
void visit (const Or *) override
 
void visit (const Not *) override
 
void visit (const Select *) override
 
void visit (const Load *) override
 
void visit (const Ramp *) override
 
void visit (const Broadcast *) override
 
void visit (const Call *) override
 
void visit (const Let *) override
 
void visit (const LetStmt *) override
 
void visit (const AssertStmt *) override
 
void visit (const ProducerConsumer *) override
 
void visit (const For *) override
 
void visit (const Acquire *) override
 
void visit (const Store *) override
 
void visit (const Block *) override
 
void visit (const Fork *) override
 
void visit (const IfThenElse *) override
 
void visit (const Evaluate *) override
 
void visit (const Shuffle *) override
 
void visit (const VectorReduce *) override
 
void visit (const Prefetch *) override
 
void visit (const Atomic *) override
 
void visit (const Allocate *) override=0
 Generate code for an allocate node. More...
 
void visit (const Free *) override=0
 Generate code for a free node. More...
 
void visit (const Provide *) override
 These IR nodes should have been removed during lowering. More...
 
void visit (const Realize *) override
 
virtual void visit (const IntImm *)
 
virtual void visit (const UIntImm *)
 
virtual void visit (const FloatImm *)
 
virtual void visit (const StringImm *)
 
virtual void visit (const Cast *)
 
virtual void visit (const Variable *)
 
virtual void visit (const Add *)
 
virtual void visit (const Sub *)
 
virtual void visit (const Mul *)
 
virtual void visit (const Div *)
 
virtual void visit (const Mod *)
 
virtual void visit (const Min *)
 
virtual void visit (const Max *)
 
virtual void visit (const EQ *)
 
virtual void visit (const NE *)
 
virtual void visit (const LT *)
 
virtual void visit (const LE *)
 
virtual void visit (const GT *)
 
virtual void visit (const GE *)
 
virtual void visit (const And *)
 
virtual void visit (const Or *)
 
virtual void visit (const Not *)
 
virtual void visit (const Select *)
 
virtual void visit (const Load *)
 
virtual void visit (const Ramp *)
 
virtual void visit (const Broadcast *)
 
virtual void visit (const Call *)
 
virtual void visit (const Let *)
 
virtual void visit (const LetStmt *)
 
virtual void visit (const AssertStmt *)
 
virtual void visit (const ProducerConsumer *)
 
virtual void visit (const For *)
 
virtual void visit (const Store *)
 
virtual void visit (const Provide *)
 
virtual void visit (const Allocate *)
 
virtual void visit (const Free *)
 
virtual void visit (const Realize *)
 
virtual void visit (const Block *)
 
virtual void visit (const IfThenElse *)
 
virtual void visit (const Evaluate *)
 
virtual void visit (const Shuffle *)
 
virtual void visit (const VectorReduce *)
 
virtual void visit (const Prefetch *)
 
virtual void visit (const Fork *)
 
virtual void visit (const Acquire *)
 
virtual void visit (const Atomic *)
 
- Protected Member Functions inherited from Halide::Internal::CodeGen_LLVM
 CodeGen_LLVM (Target t)
 
virtual void compile_func (const LoweredFunc &func, const std::string &simple_name, const std::string &extern_name)
 Compile a specific halide declaration into the llvm Module. More...
 
virtual void compile_buffer (const Buffer<> &buffer)
 
virtual void begin_func (LinkageType linkage, const std::string &simple_name, const std::string &extern_name, const std::vector< LoweredArgument > &args)
 Helper functions for compiling Halide functions to llvm functions. More...
 
virtual void end_func (const std::vector< LoweredArgument > &args)
 
virtual bool use_pic () const
 
virtual Type upgrade_type_for_argument_passing (const Type &) const
 Return the type that a Halide type should be passed in and out of functions as. More...
 
virtual void init_context ()
 Grab all the context specific internal state. More...
 
void add_external_code (const Module &halide_module)
 Add external_code entries to llvm module. More...
 
void optimize_module ()
 Run all of llvm's optimization passes on the module. More...
 
void sym_push (const std::string &name, llvm::Value *value)
 Add an entry to the symbol table, hiding previous entries with the same name. More...
 
void sym_pop (const std::string &name)
 Remove an entry for the symbol table, revealing any previous entries with the same name. More...
 
llvm::Value * sym_get (const std::string &name, bool must_succeed=true) const
 Fetch an entry from the symbol table. More...
 
bool sym_exists (const std::string &name) const
 Test if an item exists in the symbol table. More...
 
llvm::FunctionType * signature_to_type (const ExternSignature &signature)
 Given a Halide ExternSignature, return the equivalent llvm::FunctionType. More...
 
llvm::Value * codegen (const Expr &)
 Emit code that evaluates an expression, and return the llvm representation of the result of the expression. More...
 
void codegen (const Stmt &)
 Emit code that runs a statement. More...
 
void scalarize (const Expr &)
 Codegen a vector Expr by codegenning each lane and combining. More...
 
llvm::Value * register_destructor (llvm::Function *destructor_fn, llvm::Value *obj, DestructorType when)
 
void trigger_destructor (llvm::Function *destructor_fn, llvm::Value *stack_slot)
 Call a destructor early. More...
 
llvm::BasicBlock * get_destructor_block ()
 Retrieves the block containing the error handling code. More...
 
void create_assertion (llvm::Value *condition, const Expr &message, llvm::Value *error_code=nullptr)
 Codegen an assertion. More...
 
void codegen_asserts (const std::vector< const AssertStmt * > &asserts)
 Codegen a block of asserts with pure conditions. More...
 
void get_parallel_tasks (const Stmt &s, std::vector< ParallelTask > &tasks, std::pair< std::string, int > prefix)
 
void do_parallel_tasks (const std::vector< ParallelTask > &tasks)
 
void do_as_parallel_task (const Stmt &s)
 
void return_with_error_code (llvm::Value *error_code)
 Return the the pipeline with the given error code. More...
 
llvm::Constant * create_string_constant (const std::string &str)
 Put a string constant in the module as a global variable and return a pointer to it. More...
 
llvm::Constant * create_binary_blob (const std::vector< char > &data, const std::string &name, bool constant=true)
 Put a binary blob in the module as a global variable and return a pointer to it. More...
 
llvm::Value * create_broadcast (llvm::Value *, int lanes)
 Widen an llvm scalar into an llvm vector with the given number of lanes. More...
 
llvm::Value * codegen_buffer_pointer (const std::string &buffer, Type type, llvm::Value *index)
 Generate a pointer into a named buffer at a given index, of a given type. More...
 
llvm::Value * codegen_buffer_pointer (const std::string &buffer, Type type, Expr index)
 
llvm::Value * codegen_buffer_pointer (llvm::Value *base_address, Type type, Expr index)
 
llvm::Value * codegen_buffer_pointer (llvm::Value *base_address, Type type, llvm::Value *index)
 
llvm::Value * make_halide_type_t (const Type &)
 Turn a Halide Type into an llvm::Value representing a constant halide_type_t. More...
 
void add_tbaa_metadata (llvm::Instruction *inst, std::string buffer, const Expr &index)
 Mark a load or store with type-based-alias-analysis metadata so that llvm knows it can reorder loads and stores across different buffers. More...
 
virtual std::string get_allocation_name (const std::string &n)
 Get a unique name for the actual block of memory that an allocate node uses. More...
 
void visit (const IntImm *) override
 Generate code for various IR nodes. More...
 
void visit (const UIntImm *) override
 
void visit (const FloatImm *) override
 
void visit (const StringImm *) override
 
void visit (const Cast *) override
 
void visit (const Variable *) override
 
void visit (const Add *) override
 
void visit (const Sub *) override
 
void visit (const Mul *) override
 
void visit (const Div *) override
 
void visit (const Mod *) override
 
void visit (const Min *) override
 
void visit (const Max *) override
 
void visit (const EQ *) override
 
void visit (const NE *) override
 
void visit (const LT *) override
 
void visit (const LE *) override
 
void visit (const GT *) override
 
void visit (const GE *) override
 
void visit (const And *) override
 
void visit (const Or *) override
 
void visit (const Not *) override
 
void visit (const Select *) override
 
void visit (const Ramp *) override
 
void visit (const Broadcast *) override
 
void visit (const Let *) override
 
void visit (const LetStmt *) override
 
void visit (const ProducerConsumer *) override
 
void visit (const Acquire *) override
 
void visit (const Block *) override
 
void visit (const Fork *) override
 
void visit (const IfThenElse *) override
 
void visit (const Evaluate *) override
 
void visit (const Shuffle *) override
 
void visit (const VectorReduce *) override
 
void visit (const Prefetch *) override
 
void visit (const Provide *) override
 These IR nodes should have been removed during lowering. More...
 
void visit (const Realize *) override
 
virtual void prepare_for_early_exit ()
 If we have to bail out of a pipeline midway, this should inject the appropriate target-specific cleanup code. More...
 
virtual llvm::Type * llvm_type_of (const Type &) const
 Get the llvm type equivalent to the given halide type in the current context. More...
 
llvm::Value * create_alloca_at_entry (llvm::Type *type, int n, bool zero_initialize=false, const std::string &name="")
 Perform an alloca at the function entrypoint. More...
 
llvm::Value * get_user_context () const
 The user_context argument. More...
 
virtual llvm::Value * interleave_vectors (const std::vector< llvm::Value * > &)
 Implementation of the intrinsic call to interleave_vectors. More...
 
llvm::Value * call_intrin (const Type &t, int intrin_lanes, const std::string &name, std::vector< Expr >)
 Generate a call to a vector intrinsic or runtime inlined function. More...
 
llvm::Value * call_intrin (llvm::Type *t, int intrin_lanes, const std::string &name, std::vector< llvm::Value * >)
 
virtual llvm::Value * slice_vector (llvm::Value *vec, int start, int extent)
 Take a slice of lanes out of an llvm vector. More...
 
virtual llvm::Value * concat_vectors (const std::vector< llvm::Value * > &)
 Concatenate a bunch of llvm vectors. More...
 
virtual llvm::Value * shuffle_vectors (llvm::Value *a, llvm::Value *b, const std::vector< int > &indices)
 Create an LLVM shuffle vectors instruction. More...
 
llvm::Value * shuffle_vectors (llvm::Value *v, const std::vector< int > &indices)
 Shorthand for shuffling a vector with an undef vector. More...
 
std::pair< llvm::Function *, int > find_vector_runtime_function (const std::string &name, int lanes)
 Go looking for a vector version of a runtime function. More...
 
virtual void visit (const IntImm *)
 
virtual void visit (const UIntImm *)
 
virtual void visit (const FloatImm *)
 
virtual void visit (const StringImm *)
 
virtual void visit (const Cast *)
 
virtual void visit (const Variable *)
 
virtual void visit (const Add *)
 
virtual void visit (const Sub *)
 
virtual void visit (const Mul *)
 
virtual void visit (const Div *)
 
virtual void visit (const Mod *)
 
virtual void visit (const Min *)
 
virtual void visit (const Max *)
 
virtual void visit (const EQ *)
 
virtual void visit (const NE *)
 
virtual void visit (const LT *)
 
virtual void visit (const LE *)
 
virtual void visit (const GT *)
 
virtual void visit (const GE *)
 
virtual void visit (const And *)
 
virtual void visit (const Or *)
 
virtual void visit (const Not *)
 
virtual void visit (const Select *)
 
virtual void visit (const Load *)
 
virtual void visit (const Ramp *)
 
virtual void visit (const Broadcast *)
 
virtual void visit (const Call *)
 
virtual void visit (const Let *)
 
virtual void visit (const LetStmt *)
 
virtual void visit (const AssertStmt *)
 
virtual void visit (const ProducerConsumer *)
 
virtual void visit (const For *)
 
virtual void visit (const Store *)
 
virtual void visit (const Provide *)
 
virtual void visit (const Allocate *)
 
virtual void visit (const Free *)
 
virtual void visit (const Realize *)
 
virtual void visit (const Block *)
 
virtual void visit (const IfThenElse *)
 
virtual void visit (const Evaluate *)
 
virtual void visit (const Shuffle *)
 
virtual void visit (const VectorReduce *)
 
virtual void visit (const Prefetch *)
 
virtual void visit (const Fork *)
 
virtual void visit (const Acquire *)
 
virtual void visit (const Atomic *)
 

Protected Attributes

llvm::BasicBlock * entry_block
 We hold onto the basic block at the start of the device function in order to inject allocas. More...
 
- Protected Attributes inherited from Halide::Internal::CodeGen_LLVM
std::unique_ptr< llvm::Module > module
 
llvm::Function * function
 
llvm::LLVMContext * context
 
llvm::IRBuilder< llvm::ConstantFolder, llvm::IRBuilderDefaultInserter > * builder
 
llvm::Value * value
 
llvm::MDNode * very_likely_branch
 
llvm::MDNode * default_fp_math_md
 
llvm::MDNode * strict_fp_math_md
 
std::vector< LoweredArgumentcurrent_function_args
 
Halide::Target target
 The target we're generating code for. More...
 
llvm::Type * void_t
 Some useful llvm types. More...
 
llvm::Type * i1_t
 
llvm::Type * i8_t
 
llvm::Type * i16_t
 
llvm::Type * i32_t
 
llvm::Type * i64_t
 
llvm::Type * f16_t
 
llvm::Type * f32_t
 
llvm::Type * f64_t
 
llvm::StructType * halide_buffer_t_type
 
llvm::StructType * type_t_type
 
llvm::StructType * dimension_t_type
 
llvm::StructType * metadata_t_type
 
llvm::StructType * argument_t_type
 
llvm::StructType * scalar_value_t_type
 
llvm::StructType * device_interface_t_type
 
llvm::StructType * pseudostack_slot_t_type
 
llvm::StructType * semaphore_t_type
 
llvm::StructType * semaphore_acquire_t_type
 
llvm::StructType * parallel_task_t_type
 
llvm::Type * i8x8
 Some useful llvm types for subclasses. More...
 
llvm::Type * i8x16
 
llvm::Type * i8x32
 
llvm::Type * i16x4
 
llvm::Type * i16x8
 
llvm::Type * i16x16
 
llvm::Type * i32x2
 
llvm::Type * i32x4
 
llvm::Type * i32x8
 
llvm::Type * i64x2
 
llvm::Type * i64x4
 
llvm::Type * f32x2
 
llvm::Type * f32x4
 
llvm::Type * f32x8
 
llvm::Type * f64x2
 
llvm::Type * f64x4
 
Expr wild_i8x8
 Some wildcard variables used for peephole optimizations in subclasses. More...
 
Expr wild_i16x4
 
Expr wild_i32x2
 
Expr wild_u8x8
 
Expr wild_u16x4
 
Expr wild_u32x2
 
Expr wild_i8x16
 
Expr wild_i16x8
 
Expr wild_i32x4
 
Expr wild_i64x2
 
Expr wild_u8x16
 
Expr wild_u16x8
 
Expr wild_u32x4
 
Expr wild_u64x2
 
Expr wild_i8x32
 
Expr wild_i16x16
 
Expr wild_i32x8
 
Expr wild_i64x4
 
Expr wild_u8x32
 
Expr wild_u16x16
 
Expr wild_u32x8
 
Expr wild_u64x4
 
Expr wild_f32x2
 
Expr wild_f32x4
 
Expr wild_f64x2
 
Expr wild_f32x8
 
Expr wild_f64x4
 
Expr wild_u1x_
 
Expr wild_i8x_
 
Expr wild_u8x_
 
Expr wild_i16x_
 
Expr wild_u16x_
 
Expr wild_i32x_
 
Expr wild_u32x_
 
Expr wild_i64x_
 
Expr wild_u64x_
 
Expr wild_f32x_
 
Expr wild_f64x_
 
Expr min_i8
 
Expr max_i8
 
Expr max_u8
 
Expr min_i16
 
Expr max_i16
 
Expr max_u16
 
Expr min_i32
 
Expr max_i32
 
Expr max_u32
 
Expr min_i64
 
Expr max_i64
 
Expr max_u64
 
Expr min_f32
 
Expr max_f32
 
Expr min_f64
 
Expr max_f64
 
int task_depth
 
size_t requested_alloca_total = 0
 A (very) conservative guess at the size of all alloca() storage requested (including alignment padding). More...
 
std::set< std::string > external_buffer
 Which buffers came in from the outside world (and so we can't guarantee their alignment) More...
 
bool inside_atomic_mutex_node
 Are we inside an atomic node that uses mutex locks? This is used for detecting deadlocks from nested atomics & illegal vectorization. More...
 
bool emit_atomic_stores
 Emit atomic store instructions? More...
 

Additional Inherited Members

- Public Types inherited from Halide::Internal::CodeGen_GPU_Dev
enum  MemoryFenceType { None = 0, Device = 1, Shared = 2 }
 An mask describing which type of memory fence to use for the gpu_thread_barrier() intrinsic. More...
 
- Protected Types inherited from Halide::Internal::CodeGen_LLVM
enum  DestructorType { Always, OnError, OnSuccess }
 Some destructors should always be called. More...
 
- Static Protected Attributes inherited from Halide::Internal::CodeGen_LLVM
static bool llvm_X86_enabled
 State needed by llvm for code generation, including the current module, function, context, builder, and most recently generated llvm value. More...
 
static bool llvm_ARM_enabled
 
static bool llvm_Hexagon_enabled
 
static bool llvm_AArch64_enabled
 
static bool llvm_NVPTX_enabled
 
static bool llvm_Mips_enabled
 
static bool llvm_PowerPC_enabled
 
static bool llvm_AMDGPU_enabled
 
static bool llvm_WebAssembly_enabled
 
static bool llvm_RISCV_enabled
 

Detailed Description

A code generator that emits GPU code from a given Halide stmt.

Definition at line 20 of file CodeGen_PTX_Dev.h.

Constructor & Destructor Documentation

◆ CodeGen_PTX_Dev()

Halide::Internal::CodeGen_PTX_Dev::CodeGen_PTX_Dev ( Target  host)

Create a PTX device code generator.

◆ ~CodeGen_PTX_Dev()

Halide::Internal::CodeGen_PTX_Dev::~CodeGen_PTX_Dev ( )
override

Member Function Documentation

◆ add_kernel()

void Halide::Internal::CodeGen_PTX_Dev::add_kernel ( Stmt  stmt,
const std::string &  name,
const std::vector< DeviceArgument > &  args 
)
overridevirtual

Compile a GPU kernel into the module.

This may be called many times with different kernels, which will all be accumulated into a single source module shared by a given Halide pipeline.

Implements Halide::Internal::CodeGen_GPU_Dev.

◆ test()

static void Halide::Internal::CodeGen_PTX_Dev::test ( )
static

◆ compile_to_src()

std::vector<char> Halide::Internal::CodeGen_PTX_Dev::compile_to_src ( )
overridevirtual

◆ get_current_kernel_name()

std::string Halide::Internal::CodeGen_PTX_Dev::get_current_kernel_name ( )
overridevirtual

◆ dump()

void Halide::Internal::CodeGen_PTX_Dev::dump ( )
overridevirtual

◆ print_gpu_name()

std::string Halide::Internal::CodeGen_PTX_Dev::print_gpu_name ( const std::string &  name)
overridevirtual

Returns the specified name transformed by the variable naming rules for the GPU language backend.

Used to determine the name of a parameter during host codegen.

Implements Halide::Internal::CodeGen_GPU_Dev.

◆ api_unique_name()

std::string Halide::Internal::CodeGen_PTX_Dev::api_unique_name ( )
inlineoverridevirtual

This routine returns the GPU API name that is combined into runtime routine names to ensure each GPU API has a unique name.

Implements Halide::Internal::CodeGen_GPU_Dev.

Definition at line 42 of file CodeGen_PTX_Dev.h.

◆ init_module()

void Halide::Internal::CodeGen_PTX_Dev::init_module ( )
overrideprotectedvirtual

(Re)initialize the PTX module.

This is separate from compile, since a PTX device module will often have many kernels compiled into it for a single pipeline.

Implements Halide::Internal::CodeGen_GPU_Dev.

◆ visit() [1/100]

void Halide::Internal::CodeGen_PTX_Dev::visit ( const Call )
overrideprotectedvirtual

Nodes for which we need to override default behavior for the GPU runtime.

Reimplemented from Halide::Internal::CodeGen_LLVM.

◆ visit() [2/100]

void Halide::Internal::CodeGen_PTX_Dev::visit ( const For )
overrideprotectedvirtual

Reimplemented from Halide::Internal::CodeGen_LLVM.

◆ visit() [3/100]

void Halide::Internal::CodeGen_PTX_Dev::visit ( const Allocate )
overrideprotectedvirtual

Generate code for an allocate node.

It has no default implementation - it must be handled in an architecture-specific way.

Implements Halide::Internal::CodeGen_LLVM.

◆ visit() [4/100]

void Halide::Internal::CodeGen_PTX_Dev::visit ( const Free )
overrideprotectedvirtual

Generate code for a free node.

It has no default implementation and must be handled in an architecture-specific way.

Implements Halide::Internal::CodeGen_LLVM.

◆ visit() [5/100]

void Halide::Internal::CodeGen_PTX_Dev::visit ( const AssertStmt )
overrideprotectedvirtual

Reimplemented from Halide::Internal::CodeGen_LLVM.

◆ visit() [6/100]

void Halide::Internal::CodeGen_PTX_Dev::visit ( const Load )
overrideprotectedvirtual

Reimplemented from Halide::Internal::CodeGen_LLVM.

◆ visit() [7/100]

void Halide::Internal::CodeGen_PTX_Dev::visit ( const Store )
overrideprotectedvirtual

Reimplemented from Halide::Internal::CodeGen_LLVM.

◆ visit() [8/100]

void Halide::Internal::CodeGen_PTX_Dev::visit ( const Atomic )
overrideprotectedvirtual

Reimplemented from Halide::Internal::CodeGen_LLVM.

◆ codegen_vector_reduce()

void Halide::Internal::CodeGen_PTX_Dev::codegen_vector_reduce ( const VectorReduce op,
const Expr init 
)
overrideprotectedvirtual

Compile a horizontal reduction that starts with an explicit initial value.

There are lots of complex ways to peephole optimize this pattern, especially with the proliferation of dot-product instructions, and they can usefully share logic across backends.

Reimplemented from Halide::Internal::CodeGen_LLVM.

◆ march()

std::string Halide::Internal::CodeGen_PTX_Dev::march ( ) const
protected

◆ mcpu()

std::string Halide::Internal::CodeGen_PTX_Dev::mcpu ( ) const
overrideprotectedvirtual

What should be passed as -mcpu, -mattrs, and related for compilation.

The architecture-specific code generator should define these.

Implements Halide::Internal::CodeGen_LLVM.

◆ mattrs()

std::string Halide::Internal::CodeGen_PTX_Dev::mattrs ( ) const
overrideprotectedvirtual

◆ use_soft_float_abi()

bool Halide::Internal::CodeGen_PTX_Dev::use_soft_float_abi ( ) const
overrideprotectedvirtual

◆ native_vector_bits()

int Halide::Internal::CodeGen_PTX_Dev::native_vector_bits ( ) const
overrideprotectedvirtual

What's the natural vector bit-width to use for loads, stores, etc.

Implements Halide::Internal::CodeGen_LLVM.

◆ promote_indices()

bool Halide::Internal::CodeGen_PTX_Dev::promote_indices ( ) const
inlineoverrideprotectedvirtual

Should indexing math be promoted to 64-bit on platforms with 64-bit pointers?

Reimplemented from Halide::Internal::CodeGen_LLVM.

Definition at line 76 of file CodeGen_PTX_Dev.h.

◆ upgrade_type_for_arithmetic()

Type Halide::Internal::CodeGen_PTX_Dev::upgrade_type_for_arithmetic ( const Type ) const
inlineoverrideprotectedvirtual

Return the type in which arithmetic should be done for the given storage type.

Reimplemented from Halide::Internal::CodeGen_LLVM.

Definition at line 80 of file CodeGen_PTX_Dev.h.

◆ upgrade_type_for_storage()

Type Halide::Internal::CodeGen_PTX_Dev::upgrade_type_for_storage ( const Type ) const
overrideprotectedvirtual

Return the type that a given Halide type should be stored/loaded from memory as.

Reimplemented from Halide::Internal::CodeGen_LLVM.

◆ simt_intrinsic()

std::string Halide::Internal::CodeGen_PTX_Dev::simt_intrinsic ( const std::string &  name)
protected

Map from simt variable names (e.g.

foo.__block_id_x) to the llvm ptx intrinsic functions to call to get them.

◆ supports_atomic_add()

bool Halide::Internal::CodeGen_PTX_Dev::supports_atomic_add ( const Type t) const
overrideprotectedvirtual

Reimplemented from Halide::Internal::CodeGen_LLVM.

◆ visit() [9/100]

void Halide::Internal::CodeGen_LLVM::visit
overrideprotected

Generate code for various IR nodes.

These can be overridden by architecture-specific code to perform peephole optimizations. The result of each is stored in value

◆ visit() [10/100]

void Halide::Internal::CodeGen_LLVM::visit
overrideprotected

◆ visit() [11/100]

void Halide::Internal::CodeGen_LLVM::visit
overrideprotected

◆ visit() [12/100]

void Halide::Internal::CodeGen_LLVM::visit
overrideprotected

◆ visit() [13/100]

void Halide::Internal::CodeGen_LLVM::visit
overrideprotected

◆ visit() [14/100]

void Halide::Internal::CodeGen_LLVM::visit
overrideprotected

◆ visit() [15/100]

void Halide::Internal::CodeGen_LLVM::visit
overrideprotected

◆ visit() [16/100]

void Halide::Internal::CodeGen_LLVM::visit
overrideprotected

◆ visit() [17/100]

void Halide::Internal::CodeGen_LLVM::visit
overrideprotected

◆ visit() [18/100]

void Halide::Internal::CodeGen_LLVM::visit
overrideprotected

◆ visit() [19/100]

void Halide::Internal::CodeGen_LLVM::visit
overrideprotected

◆ visit() [20/100]

void Halide::Internal::CodeGen_LLVM::visit
overrideprotected

◆ visit() [21/100]

void Halide::Internal::CodeGen_LLVM::visit
overrideprotected

◆ visit() [22/100]

void Halide::Internal::CodeGen_LLVM::visit
overrideprotected

◆ visit() [23/100]

void Halide::Internal::CodeGen_LLVM::visit
overrideprotected

◆ visit() [24/100]

void Halide::Internal::CodeGen_LLVM::visit
overrideprotected

◆ visit() [25/100]

void Halide::Internal::CodeGen_LLVM::visit
overrideprotected

◆ visit() [26/100]

void Halide::Internal::CodeGen_LLVM::visit
overrideprotected

◆ visit() [27/100]

void Halide::Internal::CodeGen_LLVM::visit
overrideprotected

◆ visit() [28/100]

void Halide::Internal::CodeGen_LLVM::visit
overrideprotected

◆ visit() [29/100]

void Halide::Internal::CodeGen_LLVM::visit
overrideprotected

◆ visit() [30/100]

void Halide::Internal::CodeGen_LLVM::visit
overrideprotected

◆ visit() [31/100]

void Halide::Internal::CodeGen_LLVM::visit
overrideprotected

◆ visit() [32/100]

void Halide::Internal::CodeGen_LLVM::visit
overrideprotected

◆ visit() [33/100]

void Halide::Internal::CodeGen_LLVM::visit
overrideprotected

◆ visit() [34/100]

void Halide::Internal::CodeGen_LLVM::visit
overrideprotected

◆ visit() [35/100]

void Halide::Internal::CodeGen_LLVM::visit
overrideprotected

◆ visit() [36/100]

void Halide::Internal::CodeGen_LLVM::visit
overrideprotected

◆ visit() [37/100]

void Halide::Internal::CodeGen_LLVM::visit
overrideprotected

◆ visit() [38/100]

void Halide::Internal::CodeGen_LLVM::visit
overrideprotected

◆ visit() [39/100]

void Halide::Internal::CodeGen_LLVM::visit
overrideprotected

◆ visit() [40/100]

void Halide::Internal::CodeGen_LLVM::visit
overrideprotected

◆ visit() [41/100]

void Halide::Internal::CodeGen_LLVM::visit
overrideprotected

◆ visit() [42/100]

void Halide::Internal::CodeGen_LLVM::visit
overrideprotected

◆ visit() [43/100]

void Halide::Internal::CodeGen_LLVM::visit
overrideprotected

◆ visit() [44/100]

void Halide::Internal::CodeGen_LLVM::visit
overrideprotected

◆ visit() [45/100]

void Halide::Internal::CodeGen_LLVM::visit
overrideprotected

◆ visit() [46/100]

void Halide::Internal::CodeGen_LLVM::visit
overrideprotected

◆ visit() [47/100]

void Halide::Internal::CodeGen_LLVM::visit
overrideprotected

◆ visit() [48/100]

void Halide::Internal::CodeGen_LLVM::visit
overrideprotected

◆ visit() [49/100]

void Halide::Internal::CodeGen_LLVM::visit
overrideprotected

◆ visit() [50/100]

void Halide::Internal::CodeGen_LLVM::visit
overrideprotected

◆ visit() [51/100]

void Halide::Internal::CodeGen_LLVM::visit
overrideprotected

Generate code for an allocate node.

It has no default implementation - it must be handled in an architecture-specific way.

◆ visit() [52/100]

void Halide::Internal::CodeGen_LLVM::visit
overrideprotected

Generate code for a free node.

It has no default implementation and must be handled in an architecture-specific way.

◆ visit() [53/100]

void Halide::Internal::CodeGen_LLVM::visit
overrideprotected

These IR nodes should have been removed during lowering.

CodeGen_LLVM will error out if they are present

◆ visit() [54/100]

void Halide::Internal::CodeGen_LLVM::visit
overrideprotected

◆ visit() [55/100]

virtual void Halide::Internal::IRVisitor::visit
protected

◆ visit() [56/100]

virtual void Halide::Internal::IRVisitor::visit
protected

◆ visit() [57/100]

virtual void Halide::Internal::IRVisitor::visit
protected

◆ visit() [58/100]

virtual void Halide::Internal::IRVisitor::visit
protected

◆ visit() [59/100]

virtual void Halide::Internal::IRVisitor::visit
protected

◆ visit() [60/100]

virtual void Halide::Internal::IRVisitor::visit
protected

◆ visit() [61/100]

virtual void Halide::Internal::IRVisitor::visit
protected

◆ visit() [62/100]

virtual void Halide::Internal::IRVisitor::visit
protected

◆ visit() [63/100]

virtual void Halide::Internal::IRVisitor::visit
protected

◆ visit() [64/100]

virtual void Halide::Internal::IRVisitor::visit
protected

◆ visit() [65/100]

virtual void Halide::Internal::IRVisitor::visit
protected

◆ visit() [66/100]

virtual void Halide::Internal::IRVisitor::visit
protected

◆ visit() [67/100]

virtual void Halide::Internal::IRVisitor::visit
protected

◆ visit() [68/100]

virtual void Halide::Internal::IRVisitor::visit
protected

◆ visit() [69/100]

virtual void Halide::Internal::IRVisitor::visit
protected

◆ visit() [70/100]

virtual void Halide::Internal::IRVisitor::visit
protected

◆ visit() [71/100]

virtual void Halide::Internal::IRVisitor::visit
protected

◆ visit() [72/100]

virtual void Halide::Internal::IRVisitor::visit
protected

◆ visit() [73/100]

virtual void Halide::Internal::IRVisitor::visit
protected

◆ visit() [74/100]

virtual void Halide::Internal::IRVisitor::visit
protected

◆ visit() [75/100]

virtual void Halide::Internal::IRVisitor::visit
protected

◆ visit() [76/100]

virtual void Halide::Internal::IRVisitor::visit
protected

◆ visit() [77/100]

virtual void Halide::Internal::IRVisitor::visit
protected

◆ visit() [78/100]

virtual void Halide::Internal::IRVisitor::visit
protected

◆ visit() [79/100]

virtual void Halide::Internal::IRVisitor::visit
protected

◆ visit() [80/100]

virtual void Halide::Internal::IRVisitor::visit
protected

◆ visit() [81/100]

virtual void Halide::Internal::IRVisitor::visit
protected

◆ visit() [82/100]

virtual void Halide::Internal::IRVisitor::visit
protected

◆ visit() [83/100]

virtual void Halide::Internal::IRVisitor::visit
protected

◆ visit() [84/100]

virtual void Halide::Internal::IRVisitor::visit
protected

◆ visit() [85/100]

virtual void Halide::Internal::IRVisitor::visit
protected

◆ visit() [86/100]

virtual void Halide::Internal::IRVisitor::visit
protected

◆ visit() [87/100]

virtual void Halide::Internal::IRVisitor::visit
protected

◆ visit() [88/100]

virtual void Halide::Internal::IRVisitor::visit
protected

◆ visit() [89/100]

virtual void Halide::Internal::IRVisitor::visit
protected

◆ visit() [90/100]

virtual void Halide::Internal::IRVisitor::visit
protected

◆ visit() [91/100]

virtual void Halide::Internal::IRVisitor::visit
protected

◆ visit() [92/100]

virtual void Halide::Internal::IRVisitor::visit
protected

◆ visit() [93/100]

virtual void Halide::Internal::IRVisitor::visit
protected

◆ visit() [94/100]

virtual void Halide::Internal::IRVisitor::visit
protected

◆ visit() [95/100]

virtual void Halide::Internal::IRVisitor::visit
protected

◆ visit() [96/100]

virtual void Halide::Internal::IRVisitor::visit
protected

◆ visit() [97/100]

virtual void Halide::Internal::IRVisitor::visit
protected

◆ visit() [98/100]

virtual void Halide::Internal::IRVisitor::visit
protected

◆ visit() [99/100]

virtual void Halide::Internal::IRVisitor::visit
protected

◆ visit() [100/100]

virtual void Halide::Internal::IRVisitor::visit
protected

Member Data Documentation

◆ entry_block

llvm::BasicBlock* Halide::Internal::CodeGen_PTX_Dev::entry_block
protected

We hold onto the basic block at the start of the device function in order to inject allocas.

Definition at line 56 of file CodeGen_PTX_Dev.h.


The documentation for this class was generated from the following file: