Halide
CodeGen_PTX_Dev.h
Go to the documentation of this file.
1 #ifndef HALIDE_CODEGEN_PTX_DEV_H
2 #define HALIDE_CODEGEN_PTX_DEV_H
3 
4 /** \file
5  * Defines the code-generator for producing CUDA host code
6  */
7 
8 #include "CodeGen_GPU_Dev.h"
9 #include "CodeGen_GPU_Host.h"
10 #include "CodeGen_LLVM.h"
11 
12 namespace llvm {
13 class BasicBlock;
14 }
15 
16 namespace Halide {
17 namespace Internal {
18 
19 /** A code generator that emits GPU code from a given Halide stmt. */
21 public:
24 
25  /** Create a PTX device code generator. */
26  CodeGen_PTX_Dev(Target host);
27  ~CodeGen_PTX_Dev() override;
28 
29  void add_kernel(Stmt stmt,
30  const std::string &name,
31  const std::vector<DeviceArgument> &args) override;
32 
33  static void test();
34 
35  std::vector<char> compile_to_src() override;
36  std::string get_current_kernel_name() override;
37 
38  void dump() override;
39 
40  std::string print_gpu_name(const std::string &name) override;
41 
42  std::string api_unique_name() override {
43  return "cuda";
44  }
45 
46 protected:
47  using CodeGen_LLVM::visit;
48 
49  /** (Re)initialize the PTX module. This is separate from compile, since
50  * a PTX device module will often have many kernels compiled into it for
51  * a single pipeline. */
52  /* override */ void init_module() override;
53 
54  /** We hold onto the basic block at the start of the device
55  * function in order to inject allocas */
56  llvm::BasicBlock *entry_block;
57 
58  /** Nodes for which we need to override default behavior for the GPU runtime */
59  // @{
60  void visit(const Call *) override;
61  void visit(const For *) override;
62  void visit(const Allocate *) override;
63  void visit(const Free *) override;
64  void visit(const AssertStmt *) override;
65  void visit(const Load *) override;
66  void visit(const Store *) override;
67  void visit(const Atomic *) override;
68  void codegen_vector_reduce(const VectorReduce *op, const Expr &init) override;
69  // @}
70 
71  std::string march() const;
72  std::string mcpu() const override;
73  std::string mattrs() const override;
74  bool use_soft_float_abi() const override;
75  int native_vector_bits() const override;
76  bool promote_indices() const override {
77  return false;
78  }
79 
80  Type upgrade_type_for_arithmetic(const Type &t) const override {
81  return t;
82  }
83  Type upgrade_type_for_storage(const Type &t) const override;
84 
85  /** Map from simt variable names (e.g. foo.__block_id_x) to the llvm
86  * ptx intrinsic functions to call to get them. */
87  std::string simt_intrinsic(const std::string &name);
88 
89  bool supports_atomic_add(const Type &t) const override;
90 };
91 
92 } // namespace Internal
93 } // namespace Halide
94 
95 #endif
Halide::Internal::Allocate
Allocate a scratch area called with the given name, type, and size.
Definition: IR.h:352
llvm
Definition: CodeGen_Internal.h:19
Halide::Internal::CodeGen_LLVM
A code generator abstract base class.
Definition: CodeGen_LLVM.h:58
Halide::Internal::VectorReduce
Horizontally reduce a vector to a scalar or narrower vector using the given commutative and associati...
Definition: IR.h:827
Halide::Internal::For
A for loop.
Definition: IR.h:698
Halide::Internal::CodeGen_PTX_Dev::march
std::string march() const
Halide::Internal::CodeGen_PTX_Dev::CodeGen_PTX_Dev
CodeGen_PTX_Dev(Target host)
Create a PTX device code generator.
Halide::Internal::CodeGen_PTX_Dev::test
static void test()
CodeGen_LLVM.h
Halide::Internal::CodeGen_PTX_Dev::dump
void dump() override
Halide::Internal::CodeGen_ARM
A code generator that emits ARM code from a given Halide stmt.
Definition: CodeGen_ARM.h:16
Halide::Internal::CodeGen_PTX_Dev::supports_atomic_add
bool supports_atomic_add(const Type &t) const override
Halide::Internal::CodeGen_PTX_Dev::native_vector_bits
int native_vector_bits() const override
What's the natural vector bit-width to use for loads, stores, etc.
Halide::Internal::Stmt
A reference-counted handle to a statement node.
Definition: Expr.h:409
Halide::Type
Types in the halide type system.
Definition: Type.h:269
Halide::Internal::Load
Load a value from a named symbol if predicate is true.
Definition: IR.h:199
Halide::Internal::Free
Free the resources associated with the given buffer.
Definition: IR.h:388
Halide
This file defines the class FunctionDAG, which is our representation of a Halide pipeline,...
Definition: AddAtomicMutex.h:21
Halide::LinkageType::Internal
@ Internal
Not visible externally, similar to 'static' linkage in C.
Halide::Internal::CodeGen_GPU_Dev
A code generator that emits GPU code from a given Halide stmt.
Definition: CodeGen_GPU_Dev.h:17
Halide::Internal::CodeGen_PTX_Dev::compile_to_src
std::vector< char > compile_to_src() override
Halide::Internal::CodeGen_PTX_Dev
A code generator that emits GPU code from a given Halide stmt.
Definition: CodeGen_PTX_Dev.h:20
CodeGen_GPU_Dev.h
Halide::Internal::CodeGen_PTX_Dev::mcpu
std::string mcpu() const override
What should be passed as -mcpu, -mattrs, and related for compilation.
Halide::Internal::Store
Store a 'value' to the buffer called 'name' at a given 'index' if 'predicate' is true.
Definition: IR.h:315
Halide::Internal::CodeGen_X86
A code generator that emits x86 code from a given Halide stmt.
Definition: CodeGen_X86.h:19
Halide::Internal::CodeGen_PTX_Dev::simt_intrinsic
std::string simt_intrinsic(const std::string &name)
Map from simt variable names (e.g.
Halide::Internal::CodeGen_PTX_Dev::visit
void visit(const Call *) override
Nodes for which we need to override default behavior for the GPU runtime.
Halide::Internal::CodeGen_LLVM::visit
void visit(const IntImm *) override
Generate code for various IR nodes.
CodeGen_GPU_Host.h
Halide::Internal::CodeGen_PTX_Dev::print_gpu_name
std::string print_gpu_name(const std::string &name) override
Returns the specified name transformed by the variable naming rules for the GPU language backend.
Halide::Internal::AssertStmt
If the 'condition' is false, then evaluate and return the message, which should be a call to an error...
Definition: IR.h:276
Halide::Internal::CodeGen_PTX_Dev::init_module
void init_module() override
(Re)initialize the PTX module.
Halide::Internal::CodeGen_PTX_Dev::upgrade_type_for_storage
Type upgrade_type_for_storage(const Type &t) const override
Return the type that a given Halide type should be stored/loaded from memory as.
Halide::Internal::CodeGen_PTX_Dev::add_kernel
void add_kernel(Stmt stmt, const std::string &name, const std::vector< DeviceArgument > &args) override
Compile a GPU kernel into the module.
Halide::Internal::Call
A function call.
Definition: IR.h:464
Halide::Internal::CodeGen_PTX_Dev::use_soft_float_abi
bool use_soft_float_abi() const override
Halide::Internal::CodeGen_PTX_Dev::get_current_kernel_name
std::string get_current_kernel_name() override
Halide::Internal::CodeGen_GPU_Host
A code generator that emits GPU code from a given Halide stmt.
Definition: CodeGen_GPU_Host.h:27
Halide::Internal::CodeGen_PTX_Dev::entry_block
llvm::BasicBlock * entry_block
We hold onto the basic block at the start of the device function in order to inject allocas.
Definition: CodeGen_PTX_Dev.h:56
Halide::Internal::CodeGen_PTX_Dev::upgrade_type_for_arithmetic
Type upgrade_type_for_arithmetic(const Type &t) const override
Return the type in which arithmetic should be done for the given storage type.
Definition: CodeGen_PTX_Dev.h:80
Halide::Internal::CodeGen_PTX_Dev::codegen_vector_reduce
void codegen_vector_reduce(const VectorReduce *op, const Expr &init) override
Compile a horizontal reduction that starts with an explicit initial value.
Halide::Expr
A fragment of Halide syntax.
Definition: Expr.h:256
Halide::Internal::CodeGen_PTX_Dev::promote_indices
bool promote_indices() const override
Should indexing math be promoted to 64-bit on platforms with 64-bit pointers?
Definition: CodeGen_PTX_Dev.h:76
Halide::Internal::CodeGen_PTX_Dev::mattrs
std::string mattrs() const override
Halide::Internal::Atomic
Lock all the Store nodes in the body statement.
Definition: IR.h:809
Halide::Internal::CodeGen_PTX_Dev::api_unique_name
std::string api_unique_name() override
This routine returns the GPU API name that is combined into runtime routine names to ensure each GPU ...
Definition: CodeGen_PTX_Dev.h:42
Halide::Target
A struct representing a target machine and os to generate code for.
Definition: Target.h:19
Halide::Internal::CodeGen_PTX_Dev::~CodeGen_PTX_Dev
~CodeGen_PTX_Dev() override