Halide 19.0.0
Halide compiler and libraries
Loading...
Searching...
No Matches
cuda_functions.h
Go to the documentation of this file.
1// Note that this header intentionally does not use include
2// guards. The intended usage of this file is to define the meaning of
3// the CUDA_FN macros, and then include this file, sometimes
4// repeatedly within the same compilation unit.
5
6#ifndef CUDA_FN
7#define CUDA_FN(ret, fn, args)
8#endif
9#ifndef CUDA_FN_OPTIONAL
10#define CUDA_FN_OPTIONAL(ret, fn, args)
11#endif
12#ifndef CUDA_FN_3020
13#define CUDA_FN_3020(ret, fn, fn_3020, args) CUDA_FN(ret, fn, args)
14#endif
15#ifndef CUDA_FN_4000
16#define CUDA_FN_4000(ret, fn, fn_4000, args) CUDA_FN(ret, fn, args)
17#endif
18
19CUDA_FN(CUresult, cuInit, (unsigned int Flags));
20CUDA_FN(CUresult, cuDeviceGetCount, (int *count));
21CUDA_FN(CUresult, cuDeviceGet, (CUdevice * device, int ordinal));
22CUDA_FN(CUresult, cuDeviceGetAttribute, (int *, CUdevice_attribute attrib, CUdevice dev));
23CUDA_FN(CUresult, cuDeviceGetName, (char *, int len, CUdevice dev));
24CUDA_FN_3020(CUresult, cuDeviceTotalMem, cuDeviceTotalMem_v2, (size_t *, CUdevice dev));
25CUDA_FN_3020(CUresult, cuCtxCreate, cuCtxCreate_v2, (CUcontext * pctx, unsigned int flags, CUdevice dev));
26CUDA_FN_4000(CUresult, cuCtxDestroy, cuCtxDestroy_v2, (CUcontext pctx));
27CUDA_FN(CUresult, cuProfilerStop, ());
28CUDA_FN(CUresult, cuCtxGetApiVersion, (CUcontext ctx, unsigned int *version));
29CUDA_FN(CUresult, cuCtxGetDevice, (CUdevice *));
30CUDA_FN(CUresult, cuModuleLoadData, (CUmodule * module, const void *image));
31CUDA_FN(CUresult, cuModuleLoadDataEx, (CUmodule * module, const void *image, unsigned int numOptions, CUjit_option *options, void **optionValues));
32CUDA_FN(CUresult, cuModuleUnload, (CUmodule module));
33CUDA_FN(CUresult, cuModuleGetFunction, (CUfunction * hfunc, CUmodule hmod, const char *name));
34CUDA_FN_3020(CUresult, cuMemAlloc, cuMemAlloc_v2, (CUdeviceptr * dptr, size_t bytesize));
35CUDA_FN_3020(CUresult, cuMemFree, cuMemFree_v2, (CUdeviceptr dptr));
36CUDA_FN_3020(CUresult, cuMemcpyHtoD, cuMemcpyHtoD_v2, (CUdeviceptr dstDevice, const void *srcHost, size_t ByteCount));
37CUDA_FN_3020(CUresult, cuMemcpyDtoH, cuMemcpyDtoH_v2, (void *dstHost, CUdeviceptr srcDevice, size_t ByteCount));
38CUDA_FN_3020(CUresult, cuMemcpyDtoD, cuMemcpyDtoD_v2, (CUdeviceptr dstHost, CUdeviceptr srcDevice, size_t ByteCount));
39
40CUDA_FN_3020(CUresult, cuMemcpyHtoDAsync, cuMemcpyHtoDAsync_v2, (CUdeviceptr dstDevice, const void *srcHost, size_t ByteCount, CUstream stream));
41CUDA_FN_3020(CUresult, cuMemcpyDtoHAsync, cuMemcpyDtoHAsync_v2, (void *dstHost, CUdeviceptr srcDevice, size_t ByteCount, CUstream stream));
42CUDA_FN_3020(CUresult, cuMemcpyDtoDAsync, cuMemcpyDtoDAsync_v2, (CUdeviceptr dstHost, CUdeviceptr srcDevice, size_t ByteCount, CUstream stream));
43
44CUDA_FN_3020(CUresult, cuMemcpy3D, cuMemcpy3D_v2, (const CUDA_MEMCPY3D *pCopy));
45CUDA_FN(CUresult, cuLaunchKernel, (CUfunction f, unsigned int gridDimX, unsigned int gridDimY, unsigned int gridDimZ, unsigned int blockDimX, unsigned int blockDimY, unsigned int blockDimZ, unsigned int sharedMemBytes, CUstream hStream, void **kernelParams, void **extra));
46CUDA_FN(CUresult, cuCtxSynchronize, ());
47
48CUDA_FN_4000(CUresult, cuCtxPushCurrent, cuCtxPushCurrent_v2, (CUcontext ctx));
49CUDA_FN_4000(CUresult, cuCtxPopCurrent, cuCtxPopCurrent_v2, (CUcontext * pctx));
50
51CUDA_FN(CUresult, cuPointerGetAttribute, (void *result, int query, CUdeviceptr ptr));
52
53CUDA_FN_OPTIONAL(CUresult, cuStreamSynchronize, (CUstream hStream));
54
55#undef CUDA_FN
56#undef CUDA_FN_OPTIONAL
57#undef CUDA_FN_3020
58#undef CUDA_FN_4000
#define CUDA_FN_3020(ret, fn, fn_3020, args)
#define CUDA_FN(ret, fn, args)
#define CUDA_FN_4000(ret, fn, fn_4000, args)
#define CUDA_FN_OPTIONAL(ret, fn, args)