1#ifndef HALIDE_MINI_CUDA_H
2#define HALIDE_MINI_CUDA_H
9#if defined(WINDOWS) && defined(BITS_32)
10#define CUDAAPI __stdcall
232#define CU_POINTER_ATTRIBUTE_CONTEXT 1
struct CUarray_st * CUarray
struct CUevent_st * CUevent
CUDA event.
struct CUmod_st * CUmodule
CUDA module.
@ CUDA_ERROR_OPERATING_SYSTEM
@ CUDA_ERROR_OUT_OF_MEMORY
@ CUDA_ERROR_PEER_ACCESS_UNSUPPORTED
@ CUDA_ERROR_PROFILER_ALREADY_STARTED
@ CUDA_ERROR_ALREADY_ACQUIRED
@ CUDA_ERROR_INVALID_HANDLE
@ CUDA_ERROR_PROFILER_DISABLED
@ CUDA_ERROR_LAUNCH_OUT_OF_RESOURCES
@ CUDA_ERROR_LAUNCH_TIMEOUT
@ CUDA_ERROR_NO_BINARY_FOR_GPU
@ CUDA_ERROR_CONTEXT_ALREADY_IN_USE
@ CUDA_ERROR_ARRAY_IS_MAPPED
@ CUDA_ERROR_ILLEGAL_ADDRESS
@ CUDA_ERROR_PEER_ACCESS_NOT_ENABLED
@ CUDA_ERROR_MISALIGNED_ADDRESS
@ CUDA_ERROR_SHARED_OBJECT_INIT_FAILED
@ CUDA_ERROR_NOT_PERMITTED
@ CUDA_ERROR_PEER_ACCESS_ALREADY_ENABLED
@ CUDA_ERROR_ILLEGAL_INSTRUCTION
@ CUDA_ERROR_NOT_MAPPED_AS_POINTER
@ CUDA_ERROR_INVALID_DEVICE
@ CUDA_ERROR_FILE_NOT_FOUND
@ CUDA_ERROR_INVALID_GRAPHICS_CONTEXT
@ CUDA_ERROR_UNSUPPORTED_LIMIT
@ CUDA_ERROR_NOT_INITIALIZED
@ CUDA_ERROR_INVALID_ADDRESS_SPACE
@ CUDA_ERROR_INVALID_CONTEXT
@ CUDA_ERROR_PROFILER_NOT_INITIALIZED
@ CUDA_ERROR_DEINITIALIZED
@ CUDA_ERROR_LAUNCH_FAILED
@ CUDA_ERROR_CONTEXT_ALREADY_CURRENT
@ CUDA_ERROR_UNMAP_FAILED
@ CUDA_ERROR_CONTEXT_IS_DESTROYED
@ CUDA_ERROR_ALREADY_MAPPED
@ CUDA_ERROR_LAUNCH_INCOMPATIBLE_TEXTURING
@ CUDA_ERROR_ECC_UNCORRECTABLE
@ CUDA_ERROR_INVALID_SOURCE
@ CUDA_ERROR_PROFILER_ALREADY_STOPPED
@ CUDA_ERROR_NOT_MAPPED_AS_ARRAY
@ CUDA_ERROR_JIT_COMPILER_NOT_FOUND
@ CUDA_ERROR_INVALID_IMAGE
@ CUDA_ERROR_SHARED_OBJECT_SYMBOL_NOT_FOUND
@ CUDA_ERROR_INVALID_VALUE
@ CUDA_ERROR_PRIMARY_CONTEXT_ACTIVE
@ CUDA_ERROR_NVLINK_UNCORRECTABLE
@ CUDA_ERROR_NOT_SUPPORTED
enum Halide::Runtime::Internal::Cuda::CUmemorytype_enum CUmemorytype
struct CUfunc_st * CUfunction
CUDA function.
struct CUstream_st * CUstream
CUDA stream.
@ CU_JIT_ERROR_LOG_BUFFER_SIZE_BYTES
@ CU_JIT_ERROR_LOG_BUFFER
@ CU_JIT_THREADS_PER_BLOCK
@ CU_JIT_INFO_LOG_BUFFER_SIZE_BYTES
@ CU_JIT_TARGET_FROM_CUCONTEXT
@ CU_JIT_OPTIMIZATION_LEVEL
@ CU_JIT_FALLBACK_STRATEGY
struct CUctx_st * CUcontext
CUDA context.
struct Halide::Runtime::Internal::Cuda::CUDA_MEMCPY3D_st CUDA_MEMCPY3D
enum Halide::Runtime::Internal::Cuda::CUjit_option_enum CUjit_option
@ CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LINEAR_WIDTH
Maximum 2D linear texture width.
@ CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_ARRAY_HEIGHT
Deprecated, use CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LAYERED_HEIGHT.
@ CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LINEAR_PITCH
Maximum 2D linear texture pitch in bytes.
@ CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_Y
Maximum grid dimension Y.
@ CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_LINEAR_WIDTH
Maximum 1D linear texture width.
@ CU_DEVICE_ATTRIBUTE_CONCURRENT_KERNELS
Device can possibly execute multiple kernels concurrently.
@ CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_WIDTH
Maximum 2D surface width.
@ CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_HEIGHT
Maximum 2D texture height.
@ CU_DEVICE_ATTRIBUTE_MEMORY_CLOCK_RATE
Peak memory clock frequency in kilohertz.
@ CU_DEVICE_ATTRIBUTE_CLOCK_RATE
Typical clock frequency in kilohertz.
@ CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_MIPMAPPED_WIDTH
Maximum mipmapped 1D texture width.
@ CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_MIPMAPPED_HEIGHT
Maximum mipmapped 2D texture height.
@ CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE1D_LAYERED_LAYERS
Maximum layers in a 1D layered surface.
@ CU_DEVICE_ATTRIBUTE_MAX
@ CU_DEVICE_ATTRIBUTE_MANAGED_MEMORY
Device can allocate managed memory on this system.
@ CU_DEVICE_ATTRIBUTE_COMPUTE_MODE
Compute mode (See CUcomputemode for details)
@ CU_DEVICE_ATTRIBUTE_PCI_DEVICE_ID
PCI device ID of the device.
@ CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_GATHER_WIDTH
Maximum 2D texture width if CUDA_ARRAY3D_TEXTURE_GATHER is set.
@ CU_DEVICE_ATTRIBUTE_TEXTURE_ALIGNMENT
Alignment requirement for textures.
@ CU_DEVICE_ATTRIBUTE_MULTI_GPU_BOARD
Device is on a multi-GPU board.
@ CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURECUBEMAP_LAYERED_LAYERS
Maximum layers in a cubemap layered texture.
@ CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LINEAR_HEIGHT
Maximum 2D linear texture height.
@ CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_X
Maximum block dimension X.
@ CU_DEVICE_ATTRIBUTE_MAX_PITCH
Maximum pitch in bytes allowed by memory copies.
@ CU_DEVICE_ATTRIBUTE_ASYNC_ENGINE_COUNT
Number of asynchronous engines.
@ CU_DEVICE_ATTRIBUTE_SURFACE_ALIGNMENT
Alignment requirement for surfaces.
@ CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURECUBEMAP_LAYERED_WIDTH
Maximum cubemap layered texture width/height.
@ CU_DEVICE_ATTRIBUTE_TEXTURE_PITCH_ALIGNMENT
Pitch alignment requirement for textures.
@ CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_WIDTH
Maximum 2D texture width.
@ CU_DEVICE_ATTRIBUTE_UNIFIED_ADDRESSING
Device shares a unified address space with the host.
@ CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LAYERED_LAYERS
Maximum layers in a 2D layered texture.
@ CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_LAYERED_WIDTH
Maximum 2D layered surface width.
@ CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_WIDTH
Maximum 3D texture width.
@ CU_DEVICE_ATTRIBUTE_GLOBAL_MEMORY_BUS_WIDTH
Global memory bus width in bits.
@ CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR
Minor compute capability version number.
@ CU_DEVICE_ATTRIBUTE_L2_CACHE_SIZE
Size of L2 cache in bytes.
@ CU_DEVICE_ATTRIBUTE_WARP_SIZE
Warp size in threads.
@ CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_MULTIPROCESSOR
Maximum shared memory available per multiprocessor in bytes.
@ CU_DEVICE_ATTRIBUTE_REGISTERS_PER_BLOCK
Deprecated, use CU_DEVICE_ATTRIBUTE_MAX_REGISTERS_PER_BLOCK.
@ CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_Z
Maximum grid dimension Z.
@ CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LAYERED_HEIGHT
Maximum 2D layered texture height.
@ CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_Z
Maximum block dimension Z.
@ CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE3D_DEPTH
Maximum 3D surface depth.
@ CU_DEVICE_ATTRIBUTE_GPU_OVERLAP
Device can possibly copy memory and execute a kernel concurrently.
@ CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURECUBEMAP_WIDTH
Maximum cubemap texture width/height.
@ CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_ARRAY_NUMSLICES
Deprecated, use CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LAYERED_LAYERS.
@ CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACECUBEMAP_LAYERED_WIDTH
Maximum cubemap layered surface width.
@ CU_DEVICE_ATTRIBUTE_INTEGRATED
Device is integrated with host memory.
@ CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE1D_WIDTH
Maximum 1D surface width.
@ CU_DEVICE_ATTRIBUTE_MULTI_GPU_BOARD_GROUP_ID
Unique id for a group of devices on the same multi-GPU board.
@ CU_DEVICE_ATTRIBUTE_CAN_TEX2D_GATHER
Deprecated, do not use.
@ CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_BLOCK
Maximum shared memory available per block in bytes.
@ CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACECUBEMAP_LAYERED_LAYERS
Maximum layers in a cubemap layered surface.
@ CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_ARRAY_WIDTH
Deprecated, use CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LAYERED_WIDTH.
@ CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_LAYERED_HEIGHT
Maximum 2D layered surface height.
@ CU_DEVICE_ATTRIBUTE_MAX_REGISTERS_PER_MULTIPROCESSOR
Maximum number of 32-bit registers available per multiprocessor.
@ CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_DEPTH
Maximum 3D texture depth.
@ CU_DEVICE_ATTRIBUTE_MAX_REGISTERS_PER_BLOCK
Maximum number of 32-bit registers available per block.
@ CU_DEVICE_ATTRIBUTE_TCC_DRIVER
Device is using TCC driver model.
@ CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_LAYERED_WIDTH
Maximum 1D layered texture width.
@ CU_DEVICE_ATTRIBUTE_GLOBAL_L1_CACHE_SUPPORTED
Device supports caching globals in L1.
@ CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_X
Maximum grid dimension X.
@ CU_DEVICE_ATTRIBUTE_STREAM_PRIORITIES_SUPPORTED
Device supports stream priorities.
@ CU_DEVICE_ATTRIBUTE_MAX_THREADS_PER_BLOCK
Maximum number of threads per block.
@ CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LAYERED_WIDTH
Maximum 2D layered texture width.
@ CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR
Major compute capability version number.
@ CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_LAYERED_LAYERS
Maximum layers in a 1D layered texture.
@ CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_WIDTH
Maximum 1D texture width.
@ CU_DEVICE_ATTRIBUTE_PCI_DOMAIN_ID
PCI domain ID of the device.
@ CU_DEVICE_ATTRIBUTE_PCI_BUS_ID
PCI bus ID of the device.
@ CU_DEVICE_ATTRIBUTE_MAX_THREADS_PER_MULTIPROCESSOR
Maximum resident threads per multiprocessor.
@ CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE1D_LAYERED_WIDTH
Maximum 1D layered surface width.
@ CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_MIPMAPPED_WIDTH
Maximum mipmapped 2D texture width.
@ CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_GATHER_HEIGHT
Maximum 2D texture height if CUDA_ARRAY3D_TEXTURE_GATHER is set.
@ CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_HEIGHT
Maximum 3D texture height.
@ CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE3D_HEIGHT
Maximum 3D surface height.
@ CU_DEVICE_ATTRIBUTE_LOCAL_L1_CACHE_SUPPORTED
Device supports caching locals in L1.
@ CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_HEIGHT
Maximum 2D surface height.
@ CU_DEVICE_ATTRIBUTE_ECC_ENABLED
Device has ECC support enabled.
@ CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT
Number of multiprocessors on device.
@ CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACECUBEMAP_WIDTH
Maximum cubemap surface width.
@ CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_HEIGHT_ALTERNATE
Alternate maximum 3D texture height.
@ CU_DEVICE_ATTRIBUTE_SHARED_MEMORY_PER_BLOCK
Deprecated, use CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_BLOCK.
@ CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE3D_WIDTH
Maximum 3D surface width.
@ CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_WIDTH_ALTERNATE
Alternate maximum 3D texture width.
@ CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_LAYERED_LAYERS
Maximum layers in a 2D layered surface.
@ CU_DEVICE_ATTRIBUTE_TOTAL_CONSTANT_MEMORY
Memory available on device for constant variables in a CUDA C kernel in bytes.
@ CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_DEPTH_ALTERNATE
Alternate maximum 3D texture depth.
@ CU_DEVICE_ATTRIBUTE_CAN_MAP_HOST_MEMORY
Device can map host memory into CUDA address space.
@ CU_DEVICE_ATTRIBUTE_KERNEL_EXEC_TIMEOUT
Specifies whether there is a run time limit on kernels.
@ CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_Y
Maximum block dimension Y.
This file defines the class FunctionDAG, which is our representation of a Halide pipeline,...
@ Internal
Not visible externally, similar to 'static' linkage in C.
size_t WidthInBytes
Width of 3D memory copy in bytes.
CUmemorytype srcMemoryType
Source memory type (host, device, array)
size_t Height
Height of 3D memory copy.
size_t srcPitch
Source pitch (ignored when src is array)
CUdeviceptr srcDevice
Source device pointer.
const void * srcHost
Source host pointer.
CUarray dstArray
Destination array reference.
size_t srcHeight
Source height (ignored when src is array; may be 0 if Depth==1)
CUdeviceptr dstDevice
Destination device pointer.
size_t dstHeight
Destination height (ignored when dst is array; may be 0 if Depth==1)
CUmemorytype dstMemoryType
Destination memory type (host, device, array)
size_t dstXInBytes
Destination X in bytes.
void * dstHost
Destination host pointer.
size_t dstZ
Destination Z.
void * reserved0
Must be NULL.
void * reserved1
Must be NULL.
size_t dstPitch
Destination pitch (ignored when dst is array)
CUarray srcArray
Source array reference.
size_t Depth
Depth of 3D memory copy.
size_t dstY
Destination Y.
size_t srcXInBytes
Source X in bytes.
size_t dstLOD
Destination LOD.