Halide
FuseGPUThreadLoops.h
Go to the documentation of this file.
1 #ifndef HALIDE_FUSE_GPU_THREAD_LOOPS_H
2 #define HALIDE_FUSE_GPU_THREAD_LOOPS_H
3 
4 /** \file
5  * Defines the lowering pass that fuses and normalizes loops over gpu
6  * threads to target CUDA, OpenCL, and Metal.
7  */
8 
9 #include "Expr.h"
10 
11 namespace Halide {
12 namespace Internal {
13 
14 /** Rewrite all GPU loops to have a min of zero. */
15 Stmt zero_gpu_loop_mins(const Stmt &s);
16 
17 /** Converts Halide's GPGPU IR to the OpenCL/CUDA/Metal model. Within
18  * every loop over gpu block indices, fuse the inner loops over thread
19  * indices into a single loop (with predication to turn off
20  * threads). Push if conditions between GPU blocks to the innermost GPU threads.
21  * Also injects synchronization points as needed, and hoists
22  * shared allocations at the block level out into a single shared
23  * memory array, and heap allocations into a slice of a global pool
24  * allocated outside the kernel. */
25 Stmt fuse_gpu_thread_loops(Stmt s);
26 
27 } // namespace Internal
28 } // namespace Halide
29 
30 #endif
Halide::Internal::zero_gpu_loop_mins
Stmt zero_gpu_loop_mins(const Stmt &s)
Rewrite all GPU loops to have a min of zero.
Halide::Internal::fuse_gpu_thread_loops
Stmt fuse_gpu_thread_loops(Stmt s)
Converts Halide's GPGPU IR to the OpenCL/CUDA/Metal model.
Halide
This file defines the class FunctionDAG, which is our representation of a Halide pipeline,...
Definition: AbstractGenerator.h:19
Halide::LinkageType::Internal
@ Internal
Not visible externally, similar to 'static' linkage in C.
Expr.h