Halide
FuseGPUThreadLoops.h
Go to the documentation of this file.
1
#ifndef HALIDE_FUSE_GPU_THREAD_LOOPS_H
2
#define HALIDE_FUSE_GPU_THREAD_LOOPS_H
3
4
/** \file
5
* Defines the lowering pass that fuses and normalizes loops over gpu
6
* threads to target CUDA, OpenCL, and Metal.
7
*/
8
9
#include "
Expr.h
"
10
11
namespace
Halide
{
12
namespace
Internal
{
13
14
/** Rewrite all GPU loops to have a min of zero. */
15
Stmt
zero_gpu_loop_mins
(
const
Stmt &s);
16
17
/** Converts Halide's GPGPU IR to the OpenCL/CUDA/Metal model. Within
18
* every loop over gpu block indices, fuse the inner loops over thread
19
* indices into a single loop (with predication to turn off
20
* threads). Push if conditions between GPU blocks to the innermost GPU threads.
21
* Also injects synchronization points as needed, and hoists
22
* shared allocations at the block level out into a single shared
23
* memory array, and heap allocations into a slice of a global pool
24
* allocated outside the kernel. */
25
Stmt
fuse_gpu_thread_loops
(Stmt s);
26
27
}
// namespace Internal
28
}
// namespace Halide
29
30
#endif
Halide::Internal::zero_gpu_loop_mins
Stmt zero_gpu_loop_mins(const Stmt &s)
Rewrite all GPU loops to have a min of zero.
Halide::Internal::fuse_gpu_thread_loops
Stmt fuse_gpu_thread_loops(Stmt s)
Converts Halide's GPGPU IR to the OpenCL/CUDA/Metal model.
Halide
This file defines the class FunctionDAG, which is our representation of a Halide pipeline,...
Definition:
AbstractGenerator.h:19
Halide::LinkageType::Internal
@ Internal
Not visible externally, similar to 'static' linkage in C.
Expr.h
src
FuseGPUThreadLoops.h
Generated by
1.8.17