Halide
cuda_functions.h
Go to the documentation of this file.
1 // Note that this header intentionally does not use include
2 // guards. The intended usage of this file is to define the meaning of
3 // the CUDA_FN macros, and then include this file, sometimes
4 // repeatedly within the same compilation unit.
5 
6 #ifndef CUDA_FN
7 #define CUDA_FN(ret, fn, args)
8 #endif
9 #ifndef CUDA_FN_OPTIONAL
10 #define CUDA_FN_OPTIONAL(ret, fn, args)
11 #endif
12 #ifndef CUDA_FN_3020
13 #define CUDA_FN_3020(ret, fn, fn_3020, args) CUDA_FN(ret, fn, args)
14 #endif
15 #ifndef CUDA_FN_4000
16 #define CUDA_FN_4000(ret, fn, fn_4000, args) CUDA_FN(ret, fn, args)
17 #endif
18 
19 CUDA_FN(CUresult, cuInit, (unsigned int Flags));
20 CUDA_FN(CUresult, cuDeviceGetCount, (int *count));
21 CUDA_FN(CUresult, cuDeviceGet, (CUdevice * device, int ordinal));
22 CUDA_FN(CUresult, cuDeviceGetAttribute, (int *, CUdevice_attribute attrib, CUdevice dev));
23 CUDA_FN(CUresult, cuDeviceGetName, (char *, int len, CUdevice dev));
24 CUDA_FN_3020(CUresult, cuDeviceTotalMem, cuDeviceTotalMem_v2, (size_t *, CUdevice dev));
25 CUDA_FN_3020(CUresult, cuCtxCreate, cuCtxCreate_v2, (CUcontext * pctx, unsigned int flags, CUdevice dev));
26 CUDA_FN_4000(CUresult, cuCtxDestroy, cuCtxDestroy_v2, (CUcontext pctx));
27 CUDA_FN(CUresult, cuProfilerStop, ());
28 CUDA_FN(CUresult, cuCtxGetApiVersion, (CUcontext ctx, unsigned int *version));
29 CUDA_FN(CUresult, cuCtxGetDevice, (CUdevice *));
30 CUDA_FN(CUresult, cuModuleLoadData, (CUmodule * module, const void *image));
31 CUDA_FN(CUresult, cuModuleLoadDataEx, (CUmodule * module, const void *image, unsigned int numOptions, CUjit_option *options, void **optionValues));
32 CUDA_FN(CUresult, cuModuleUnload, (CUmodule module));
33 CUDA_FN(CUresult, cuModuleGetFunction, (CUfunction * hfunc, CUmodule hmod, const char *name));
34 CUDA_FN_3020(CUresult, cuMemAlloc, cuMemAlloc_v2, (CUdeviceptr * dptr, size_t bytesize));
35 CUDA_FN_3020(CUresult, cuMemFree, cuMemFree_v2, (CUdeviceptr dptr));
36 CUDA_FN_3020(CUresult, cuMemcpyHtoD, cuMemcpyHtoD_v2, (CUdeviceptr dstDevice, const void *srcHost, size_t ByteCount));
37 CUDA_FN_3020(CUresult, cuMemcpyDtoH, cuMemcpyDtoH_v2, (void *dstHost, CUdeviceptr srcDevice, size_t ByteCount));
38 CUDA_FN_3020(CUresult, cuMemcpyDtoD, cuMemcpyDtoD_v2, (CUdeviceptr dstHost, CUdeviceptr srcDevice, size_t ByteCount));
39 
40 CUDA_FN_3020(CUresult, cuMemcpyHtoDAsync, cuMemcpyHtoDAsync_v2, (CUdeviceptr dstDevice, const void *srcHost, size_t ByteCount, CUstream stream));
41 CUDA_FN_3020(CUresult, cuMemcpyDtoHAsync, cuMemcpyDtoHAsync_v2, (void *dstHost, CUdeviceptr srcDevice, size_t ByteCount, CUstream stream));
42 CUDA_FN_3020(CUresult, cuMemcpyDtoDAsync, cuMemcpyDtoDAsync_v2, (CUdeviceptr dstHost, CUdeviceptr srcDevice, size_t ByteCount, CUstream stream));
43 
44 CUDA_FN_3020(CUresult, cuMemcpy3D, cuMemcpy3D_v2, (const CUDA_MEMCPY3D *pCopy));
45 CUDA_FN(CUresult, cuLaunchKernel, (CUfunction f, unsigned int gridDimX, unsigned int gridDimY, unsigned int gridDimZ, unsigned int blockDimX, unsigned int blockDimY, unsigned int blockDimZ, unsigned int sharedMemBytes, CUstream hStream, void **kernelParams, void **extra));
46 CUDA_FN(CUresult, cuCtxSynchronize, ());
47 
48 CUDA_FN_4000(CUresult, cuCtxPushCurrent, cuCtxPushCurrent_v2, (CUcontext ctx));
49 CUDA_FN_4000(CUresult, cuCtxPopCurrent, cuCtxPopCurrent_v2, (CUcontext * pctx));
50 
51 CUDA_FN(CUresult, cuPointerGetAttribute, (void *result, int query, CUdeviceptr ptr));
52 
53 CUDA_FN_OPTIONAL(CUresult, cuStreamSynchronize, (CUstream hStream));
54 
55 #undef CUDA_FN
56 #undef CUDA_FN_OPTIONAL
57 #undef CUDA_FN_3020
58 #undef CUDA_FN_4000
Halide::Runtime::Internal::Cuda::CUDA_MEMCPY3D
struct Halide::Runtime::Internal::Cuda::CUDA_MEMCPY3D_st CUDA_MEMCPY3D
Halide::Runtime::Internal::Cuda::CUresult
CUresult
Definition: mini_cuda.h:43
CUDA_FN_3020
#define CUDA_FN_3020(ret, fn, fn_3020, args)
Definition: cuda_functions.h:13
Halide::Runtime::Internal::Cuda::CUfunction
struct CUfunc_st * CUfunction
CUDA function.
Definition: mini_cuda.h:24
CUDA_FN_4000
#define CUDA_FN_4000(ret, fn, fn_4000, args)
Definition: cuda_functions.h:16
Halide::Runtime::Internal::Cuda::CUcontext
struct CUctx_st * CUcontext
CUDA context.
Definition: mini_cuda.h:22
Halide::Runtime::Internal::Cuda::CUdevice_attribute
CUdevice_attribute
Definition: mini_cuda.h:101
Halide::Runtime::Internal::Cuda::CUdevice
int CUdevice
CUDA device.
Definition: mini_cuda.h:21
Halide::Runtime::Internal::Cuda::CUmodule
struct CUmod_st * CUmodule
CUDA module.
Definition: mini_cuda.h:23
Halide::Runtime::Internal::Cuda::CUstream
struct CUstream_st * CUstream
CUDA stream.
Definition: mini_cuda.h:25
CUDA_FN
#define CUDA_FN(ret, fn, args)
Definition: cuda_functions.h:7
Halide::Runtime::Internal::Cuda::CUjit_option
enum Halide::Runtime::Internal::Cuda::CUjit_option_enum CUjit_option
Halide::Runtime::Internal::Cuda::CUdeviceptr
unsigned int CUdeviceptr
Definition: mini_cuda.h:18
CUDA_FN_OPTIONAL
#define CUDA_FN_OPTIONAL(ret, fn, args)
Definition: cuda_functions.h:10