Halide
CodeGen_GPU_Dev.h
Go to the documentation of this file.
1 #ifndef HALIDE_CODEGEN_GPU_DEV_H
2 #define HALIDE_CODEGEN_GPU_DEV_H
3 
4 /** \file
5  * Defines the code-generator interface for producing GPU device code
6  */
7 #include <string>
8 #include <vector>
9 
10 #include "DeviceArgument.h"
11 #include "Expr.h"
12 
13 namespace Halide {
14 namespace Internal {
15 
16 /** A code generator that emits GPU code from a given Halide stmt. */
18  virtual ~CodeGen_GPU_Dev();
19 
20  /** Compile a GPU kernel into the module. This may be called many times
21  * with different kernels, which will all be accumulated into a single
22  * source module shared by a given Halide pipeline. */
23  virtual void add_kernel(Stmt stmt,
24  const std::string &name,
25  const std::vector<DeviceArgument> &args) = 0;
26 
27  /** (Re)initialize the GPU kernel module. This is separate from compile,
28  * since a GPU device module will often have many kernels compiled into it
29  * for a single pipeline. */
30  virtual void init_module() = 0;
31 
32  virtual std::vector<char> compile_to_src() = 0;
33 
34  virtual std::string get_current_kernel_name() = 0;
35 
36  virtual void dump() = 0;
37 
38  /** This routine returns the GPU API name that is combined into
39  * runtime routine names to ensure each GPU API has a unique
40  * name.
41  */
42  virtual std::string api_unique_name() = 0;
43 
44  /** Returns the specified name transformed by the variable naming rules
45  * for the GPU language backend. Used to determine the name of a parameter
46  * during host codegen. */
47  virtual std::string print_gpu_name(const std::string &name) = 0;
48 
49  /** Allows the GPU device specific code to request halide_type_t
50  * values to be passed to the kernel_run routine rather than just
51  * argument type sizes.
52  */
53  virtual bool kernel_run_takes_types() const {
54  return false;
55  }
56 
57  static bool is_gpu_var(const std::string &name);
58  static bool is_gpu_block_var(const std::string &name);
59  static bool is_gpu_thread_var(const std::string &name);
60 
61  /** Checks if expr is block uniform, i.e. does not depend on a thread
62  * var. */
63  static bool is_block_uniform(const Expr &expr);
64  /** Checks if the buffer is a candidate for constant storage. Most
65  * GPUs (APIs) support a constant memory storage class that cannot be
66  * written to and performs well for block uniform accesses. A buffer is a
67  * candidate for constant storage if it is never written to, and loads are
68  * uniform within the workgroup. */
69  static bool is_buffer_constant(const Stmt &kernel, const std::string &buffer);
70 
71  /** An mask describing which type of memory fence to use for the gpu_thread_barrier()
72  * intrinsic. Not all GPUs APIs support all types.
73  */
75  None = 0, // No fence required (just a sync)
76  Device = 1, // Device/global memory fence
77  Shared = 2 // Threadgroup/shared memory fence
78  };
79 };
80 
81 } // namespace Internal
82 } // namespace Halide
83 
84 #endif
Halide::Internal::CodeGen_GPU_Dev::Device
@ Device
Definition: CodeGen_GPU_Dev.h:76
Halide::Internal::CodeGen_GPU_Dev::~CodeGen_GPU_Dev
virtual ~CodeGen_GPU_Dev()
Halide::Internal::CodeGen_GPU_Dev::is_gpu_var
static bool is_gpu_var(const std::string &name)
Halide::Internal::CodeGen_GPU_Dev::dump
virtual void dump()=0
Halide::Internal::CodeGen_GPU_Dev::is_gpu_block_var
static bool is_gpu_block_var(const std::string &name)
Halide::Internal::CodeGen_GPU_Dev::print_gpu_name
virtual std::string print_gpu_name(const std::string &name)=0
Returns the specified name transformed by the variable naming rules for the GPU language backend.
Halide::Internal::CodeGen_GPU_Dev::Shared
@ Shared
Definition: CodeGen_GPU_Dev.h:77
Halide::Internal::Stmt
A reference-counted handle to a statement node.
Definition: Expr.h:409
Halide
This file defines the class FunctionDAG, which is our representation of a Halide pipeline,...
Definition: AddAtomicMutex.h:21
Halide::Internal::CodeGen_GPU_Dev::api_unique_name
virtual std::string api_unique_name()=0
This routine returns the GPU API name that is combined into runtime routine names to ensure each GPU ...
Halide::Internal::CodeGen_GPU_Dev::MemoryFenceType
MemoryFenceType
An mask describing which type of memory fence to use for the gpu_thread_barrier() intrinsic.
Definition: CodeGen_GPU_Dev.h:74
Halide::LinkageType::Internal
@ Internal
Not visible externally, similar to 'static' linkage in C.
Halide::Internal::CodeGen_GPU_Dev
A code generator that emits GPU code from a given Halide stmt.
Definition: CodeGen_GPU_Dev.h:17
Halide::Internal::CodeGen_GPU_Dev::add_kernel
virtual void add_kernel(Stmt stmt, const std::string &name, const std::vector< DeviceArgument > &args)=0
Compile a GPU kernel into the module.
Halide::Internal::CodeGen_GPU_Dev::is_gpu_thread_var
static bool is_gpu_thread_var(const std::string &name)
Halide::Internal::CodeGen_GPU_Dev::is_block_uniform
static bool is_block_uniform(const Expr &expr)
Checks if expr is block uniform, i.e.
Expr.h
Halide::Internal::CodeGen_GPU_Dev::compile_to_src
virtual std::vector< char > compile_to_src()=0
DeviceArgument.h
Halide::Internal::CodeGen_GPU_Dev::is_buffer_constant
static bool is_buffer_constant(const Stmt &kernel, const std::string &buffer)
Checks if the buffer is a candidate for constant storage.
Halide::Output::stmt
@ stmt
Halide::Internal::CodeGen_GPU_Dev::kernel_run_takes_types
virtual bool kernel_run_takes_types() const
Allows the GPU device specific code to request halide_type_t values to be passed to the kernel_run ro...
Definition: CodeGen_GPU_Dev.h:53
Halide::Expr
A fragment of Halide syntax.
Definition: Expr.h:256
Halide::Internal::CodeGen_GPU_Dev::get_current_kernel_name
virtual std::string get_current_kernel_name()=0
Halide::Internal::CodeGen_GPU_Dev::init_module
virtual void init_module()=0
(Re)initialize the GPU kernel module.
Halide::Internal::CodeGen_GPU_Dev::None
@ None
Definition: CodeGen_GPU_Dev.h:75