1#ifndef HALIDE_HALIDERUNTIME_H
2#define HALIDE_HALIDERUNTIME_H
4#ifndef COMPILING_HALIDE_RUNTIME
26#define HALIDE_VERSION_MAJOR 19
27#define HALIDE_VERSION_MINOR 0
28#define HALIDE_VERSION_PATCH 0
44#define HALIDE_ALWAYS_INLINE __forceinline
45#define HALIDE_NEVER_INLINE __declspec(noinline)
49#define HALIDE_ALWAYS_INLINE inline __attribute__((always_inline))
50#define HALIDE_NEVER_INLINE __attribute__((noinline))
53#ifndef HALIDE_MUST_USE_RESULT
55#if __has_attribute(nodiscard)
57#define HALIDE_MUST_USE_RESULT [[nodiscard]]
58#elif __has_attribute(warn_unused_result)
60#define HALIDE_MUST_USE_RESULT __attribute__((warn_unused_result))
62#define HALIDE_MUST_USE_RESULT
65#define HALIDE_MUST_USE_RESULT
75#ifndef HALIDE_FUNCTION_ATTRS
76#define HALIDE_FUNCTION_ATTRS
79#ifndef HALIDE_EXPORT_SYMBOL
81#define HALIDE_EXPORT_SYMBOL __declspec(dllexport)
83#define HALIDE_EXPORT_SYMBOL __attribute__((visibility("default")))
87#ifndef COMPILING_HALIDE_RUNTIME
91#if defined(__has_feature)
92#if __has_feature(address_sanitizer)
93#define HALIDE_RUNTIME_ASAN_DETECTED
97#if defined(__SANITIZE_ADDRESS__) && !defined(HALIDE_RUNTIME_ASAN_DETECTED)
98#define HALIDE_RUNTIME_ASAN_DETECTED
101#if !defined(HALIDE_RUNTIME_ASAN_DETECTED)
108#if defined(__clang__) && (__clang_major__ >= 15) && !defined(__EMSCRIPTEN__) && !defined(__i386__)
109#if defined(__is_identifier)
110#if !__is_identifier(_Float16)
111#define HALIDE_CPP_COMPILER_HAS_FLOAT16
119#if defined(__GNUC__) && (__GNUC__ >= 12)
120#if defined(__x86_64__) || (defined(__i386__) && (__GNUC__ >= 14) && defined(__SSE2__)) || ((defined(__arm__) || defined(__aarch64__)) && (__GNUC__ >= 13))
121#define HALIDE_CPP_COMPILER_HAS_FLOAT16
232 int min,
int size,
uint8_t *closure);
264 uint8_t *closure,
void *task_parent);
342 uint8_t *closure,
void *task_parent);
432typedef void *(*halide_malloc_t)(
void *,
size_t);
455typedef void *(*halide_get_symbol_t)(
const char *name);
456typedef void *(*halide_load_library_t)(
const char *name);
457typedef void *(*halide_get_library_symbol_t)(
void *lib,
const char *name);
477#if (__cplusplus >= 201103L || _MSVC_LANG >= 201103L)
490#ifndef HALIDE_ATTRIBUTE_ALIGN
492#define HALIDE_ATTRIBUTE_ALIGN(x) __declspec(align(x))
494#define HALIDE_ATTRIBUTE_ALIGN(x) __attribute__((aligned(x)))
505#if (__cplusplus >= 201103L || _MSVC_LANG >= 201103L)
521#if (__cplusplus >= 201103L || _MSVC_LANG >= 201103L)
545 return as_u32() == other.as_u32();
549 return !(*
this == other);
553 return as_u32() < other.as_u32();
558 return (
bits + 7) / 8;
573#if (__cplusplus >= 201103L || _MSVC_LANG >= 201103L)
697#if (__cplusplus >= 201103L || _MSVC_LANG >= 201103L)
702 return (
const int *)(
this + 1);
706 return (
int *)(
this + 1);
713 return (
const void *)(coordinates() +
dimensions);
734 const char *f = func();
894 int slice_dim,
int slice_pos,
1000 bool has_eviction_key,
uint64_t eviction_key);
1295 int min_bound,
int max_bound,
int min_required,
int max_required);
1301 int dimension,
int min_touched,
int max_touched,
1302 int min_valid,
int max_valid);
1310 int constrained_min,
int constrained_extent,
1311 int required_min,
int required_extent);
1313 const char *constrained_var,
int constrained_val);
1319 double val,
double min_val);
1325 double val,
double max_val);
1333 const char *loop_name);
1335 int dim,
int min,
int extent,
int valid_min,
int fold_factor);
1338 int fold_factor,
const char *loop_name,
int required_extent);
1347 int provided_size,
int required_size);
1509#if (__cplusplus >= 201103L || _MSVC_LANG >= 201103L)
1521 return (min == other.
min) &&
1528 return !(*
this == other);
1578#if (__cplusplus >= 201103L || _MSVC_LANG >= 201103L)
1582 return (
flags & flag) != 0;
1654 return host + begin_offset() *
type.bytes();
1660 return host + end_offset() *
type.bytes();
1665 return (
size_t)(end_offset() - begin_offset()) *
type.bytes();
1674 return host + index *
type.bytes();
1702#ifndef HALIDE_ATTRIBUTE_DEPRECATED
1703#ifdef HALIDE_ALLOW_DEPRECATED
1704#define HALIDE_ATTRIBUTE_DEPRECATED(x)
1707#define HALIDE_ATTRIBUTE_DEPRECATED(x) __declspec(deprecated(x))
1709#define HALIDE_ATTRIBUTE_DEPRECATED(x) __attribute__((deprecated(x)))
1796 static const int32_t VERSION = 1;
1830 int (*filter_argv_call)(
void **),
1832 const char *
const *extra_key_value_pairs);
1856 uint64_t active_threads_numerator, active_threads_denominator;
1882 uint64_t active_threads_numerator, active_threads_denominator;
1888 struct halide_profiler_func_stats *funcs;
1928 uint64_t active_threads_numerator, active_threads_denominator;
1932 struct halide_profiler_instance_state *next;
1937 struct halide_profiler_instance_state **prev_next;
1941 struct halide_profiler_pipeline_stats *pipeline_stats;
1944 struct halide_profiler_func_stats *funcs;
1961 int should_collect_statistics;
2108#if (__cplusplus >= 201103L || _MSVC_LANG >= 201103L)
2113struct check_is_pointer {
2114 static constexpr bool value =
false;
2118struct check_is_pointer<T *> {
2119 static constexpr bool value =
true;
2131 static_assert(check_is_pointer<T>::value,
"Expected a pointer type here");
2135#ifdef HALIDE_CPP_COMPILER_HAS_FLOAT16
2197#ifndef COMPILING_HALIDE_RUNTIME
2202namespace HalideFunctionInfo {
2204enum ArgumentKind { InputScalar = 0,
2208struct ArgumentInfo {
2209 std::string_view name;
void halide_set_custom_parallel_runtime(halide_do_par_for_t, halide_do_task_t, halide_do_loop_task_t, halide_do_parallel_tasks_t, halide_semaphore_init_t, halide_semaphore_try_acquire_t, halide_semaphore_release_t)
int halide_memoization_cache_lookup(void *user_context, const uint8_t *cache_key, int32_t size, struct halide_buffer_t *realized_bounds, int32_t tuple_count, struct halide_buffer_t **tuple_buffers)
Given a cache key for a memoized result, currently constructed from the Func name and top-level Func ...
int halide_error_bad_extern_fold(void *user_context, const char *func_name, int dim, int min, int extent, int valid_min, int fold_factor)
int halide_device_sync(void *user_context, struct halide_buffer_t *buf)
Wait for current GPU operations to complete.
int halide_default_do_task(void *user_context, halide_task_t f, int idx, uint8_t *closure)
int halide_do_par_for(void *user_context, halide_task_t task, int min, int size, uint8_t *closure)
void *(* halide_load_library_t)(const char *name)
int halide_error_bad_fold(void *user_context, const char *func_name, const char *var_name, const char *loop_name)
int(* halide_semaphore_release_t)(struct halide_semaphore_t *, int)
void halide_profiler_lock(struct halide_profiler_state *)
These routines are called to temporarily disable and then reenable the profiler.
void halide_cond_signal(struct halide_cond *cond)
void * halide_default_get_library_symbol(void *lib, const char *name)
int halide_do_loop_task(void *user_context, halide_loop_task_t f, int min, int extent, uint8_t *closure, void *task_parent)
halide_load_library_t halide_set_custom_load_library(halide_load_library_t user_load_library)
int halide_device_crop(void *user_context, const struct halide_buffer_t *src, struct halide_buffer_t *dst)
Give the destination buffer a device allocation which is an alias for the same coordinate range in th...
int halide_semaphore_init(struct halide_semaphore_t *, int n)
halide_get_symbol_t halide_set_custom_get_symbol(halide_get_symbol_t user_get_symbol)
double halide_float16_bits_to_double(uint16_t)
Read bits representing a half precision floating point number and return the double that represents t...
void * halide_malloc(void *user_context, size_t x)
Halide calls these functions to allocate and free memory.
void *(* halide_get_library_symbol_t)(void *lib, const char *name)
int halide_msan_annotate_buffer_is_initialized(void *user_context, struct halide_buffer_t *buffer)
Mark the data pointed to by the halide_buffer_t as initialized (but not the halide_buffer_t itself),...
void halide_default_print(void *user_context, const char *)
halide_target_feature_t
Optional features a compilation Target can have.
@ halide_target_feature_large_buffers
Enable 64-bit buffer indexing to support buffers > 2GB. Ignored if bits != 64.
@ halide_target_feature_fma
Enable x86 FMA instruction.
@ halide_target_feature_wasm_bulk_memory
Enable +bulk-memory instructions for WebAssembly codegen.
@ halide_target_feature_tsan
Enable hooks for TSAN support.
@ halide_target_feature_msan
Enable hooks for MSAN support.
@ halide_target_feature_avx512_zen4
Enable the AVX512 features supported by Zen4 processors. This include all of the Cannonlake features,...
@ halide_target_feature_wasm_threads
Enable use of threads in WebAssembly codegen. Requires the use of a wasm runtime that provides pthrea...
@ halide_target_feature_trace_loads
Trace all loads done by the pipeline. Equivalent to calling Func::trace_loads on every non-inlined Fu...
@ halide_target_feature_enable_llvm_loop_opt
Enable loop vectorization + unrolling in LLVM. Overrides halide_target_feature_disable_llvm_loop_opt....
@ halide_target_feature_no_asserts
Disable all runtime checks, for slightly tighter code.
@ halide_target_feature_cl_doubles
Enable double support on OpenCL targets.
@ halide_target_feature_rvv
Enable RISCV "V" Vector Extension.
@ halide_target_feature_avx2
Use AVX 2 instructions. Only relevant on x86.
@ halide_target_feature_trace_realizations
Trace all realizations done by the pipeline. Equivalent to calling Func::trace_realizations on every ...
@ halide_target_feature_c_plus_plus_mangling
Generate C++ mangled names for result function, et al.
@ halide_target_feature_vulkan_float16
Enable Vulkan 16-bit float support.
@ halide_target_feature_no_runtime
Do not include a copy of the Halide runtime in any generated object file or assembly.
@ halide_target_feature_hvx_v65
Enable Hexagon v65 architecture.
@ halide_target_feature_debug
Turn on debug info and output for runtime code.
@ halide_target_feature_embed_bitcode
Emulate clang -fembed-bitcode flag.
@ halide_target_feature_armv86a
Enable ARMv8.6a instructions.
@ halide_target_feature_wasm_simd128
Enable +simd128 instructions for WebAssembly codegen.
@ halide_target_feature_vulkan
Enable Vulkan runtime support.
@ halide_target_feature_end
A sentinel. Every target is considered to have this feature, and setting this feature does nothing.
@ halide_llvm_large_code_model
Use the LLVM large code model to compile.
@ halide_target_feature_profile_by_timer
Alternative to halide_target_feature_profile using timer interrupt for systems without threads or app...
@ halide_target_feature_semihosting
Used together with Target::NoOS for the baremetal target built with semihosting library and run with ...
@ halide_target_feature_soft_float_abi
Enable soft float ABI. This only enables the soft float ABI calling convention, which does not necess...
@ halide_target_feature_sve2
Enable ARM Scalable Vector Extensions v2.
@ halide_target_feature_d3d12compute
Enable Direct3D 12 Compute runtime.
@ halide_target_feature_cuda_capability86
Enable CUDA compute capability 8.6 (Ampere)
@ halide_target_feature_armv89a
Enable ARMv8.9a instructions.
@ halide_target_feature_avx512_skylake
Enable the AVX512 features supported by Skylake Xeon server processors. This adds AVX512-VL,...
@ halide_target_feature_avx512_cannonlake
Enable the AVX512 features expected to be supported by future Cannonlake processors....
@ halide_target_feature_metal
Enable the (Apple) Metal runtime.
@ halide_target_feature_hvx_128
Enable HVX 128 byte mode.
@ halide_target_feature_cuda_capability70
Enable CUDA compute capability 7.0 (Volta)
@ halide_target_feature_fma4
Enable x86 (AMD) FMA4 instruction set.
@ halide_target_feature_cuda_capability30
Enable CUDA compute capability 3.0 (Kepler)
@ halide_target_feature_no_neon
Avoid using NEON instructions. Only relevant for 32-bit ARM.
@ halide_target_feature_cuda_capability61
Enable CUDA compute capability 6.1 (Pascal)
@ halide_target_feature_armv7s
Generate code for ARMv7s. Only relevant for 32-bit ARM.
@ halide_target_feature_spirv
Enable SPIR-V code generation support.
@ halide_target_feature_trace_pipeline
Trace the pipeline.
@ halide_target_feature_armv88a
Enable ARMv8.8a instructions.
@ halide_target_feature_cl_atomic64
Enable 64-bit atomics operations on OpenCL targets.
@ halide_target_feature_egl
Force use of EGL support.
@ halide_target_feature_hvx_v68
Enable Hexagon v68 architecture.
@ halide_target_feature_avx10_1
Intel AVX10 version 1 support. vector_bits is used to indicate width.
@ halide_target_feature_profile
Launch a sampling profiler alongside the Halide pipeline that monitors and reports the runtime used b...
@ halide_target_feature_strict_float
Turn off all non-IEEE floating-point optimization. Currently applies only to LLVM targets.
@ halide_target_feature_cuda_capability35
Enable CUDA compute capability 3.5 (Kepler)
@ halide_target_feature_armv8a
Enable ARMv8a instructions.
@ halide_target_feature_asan
Enable hooks for ASAN support.
@ halide_target_feature_armv87a
Enable ARMv8.7a instructions.
@ halide_target_feature_cl_half
Enable half support on OpenCL targets.
@ halide_target_feature_vulkan_float64
Enable Vulkan 64-bit float support.
@ halide_target_feature_arm_dot_prod
Enable ARMv8.2-a dotprod extension (i.e. udot and sdot instructions)
@ halide_target_feature_avx512_sapphirerapids
Enable the AVX512 features supported by Sapphire Rapids processors. This include all of the Zen4 feat...
@ halide_target_feature_vulkan_version13
Enable Vulkan v1.3 runtime target support.
@ halide_target_feature_vulkan_version12
Enable Vulkan v1.2 runtime target support.
@ halide_target_feature_sse41
Use SSE 4.1 and earlier instructions. Only relevant on x86.
@ halide_target_feature_power_arch_2_07
Use POWER ISA 2.07 new instructions. Only relevant on POWERPC.
@ halide_target_feature_opencl
Enable the OpenCL runtime.
@ halide_target_feature_trace_stores
Trace all stores done by the pipeline. Equivalent to calling Func::trace_stores on every non-inlined ...
@ halide_target_feature_hexagon_dma
Enable Hexagon DMA buffers.
@ halide_target_feature_avx512
Enable the base AVX512 subset supported by all AVX512 architectures. The specific feature sets are AV...
@ halide_target_feature_avx512_knl
Enable the AVX512 features supported by Knight's Landing chips, such as the Xeon Phi x200....
@ halide_target_feature_cuda_capability50
Enable CUDA compute capability 5.0 (Maxwell)
@ halide_target_feature_arm_fp16
Enable ARMv8.2-a half-precision floating point data processing.
@ halide_target_feature_armv82a
Enable ARMv8.2a instructions.
@ halide_target_feature_hvx_v62
Enable Hexagon v62 architecture.
@ halide_target_feature_armv84a
Enable ARMv8.4a instructions.
@ halide_target_feature_cuda
Enable the CUDA runtime. Defaults to compute capability 2.0 (Fermi)
@ halide_target_feature_armv81a
Enable ARMv8.1a instructions.
@ halide_target_feature_webgpu
Enable the WebGPU runtime.
@ halide_target_feature_sanitizer_coverage
Enable hooks for SanitizerCoverage support.
@ halide_target_feature_cuda_capability80
Enable CUDA compute capability 8.0 (Ampere)
@ halide_target_feature_wasm_mvponly
Disable all extensions to WebAssembly codegen (including +sign-ext and +nontrapping-fptoint,...
@ halide_target_feature_f16c
Enable x86 16-bit float support.
@ halide_target_feature_vulkan_int16
Enable Vulkan 16-bit integer support.
@ halide_target_feature_cuda_capability32
Enable CUDA compute capability 3.2 (Tegra K1)
@ halide_target_feature_armv85a
Enable ARMv8.5a instructions.
@ halide_target_feature_jit
Generate code that will run immediately inside the calling process.
@ halide_target_feature_avx
Use AVX 1 instructions. Only relevant on x86.
@ halide_target_feature_cuda_capability75
Enable CUDA compute capability 7.5 (Turing)
@ halide_target_feature_check_unsafe_promises
Insert assertions for promises.
@ halide_target_feature_vsx
Use VSX instructions. Only relevant on POWERPC.
@ halide_target_feature_vulkan_int8
Enable Vulkan 8-bit integer support.
@ halide_target_feature_armv83a
Enable ARMv8.3a instructions.
@ halide_target_feature_vulkan_int64
Enable Vulkan 64-bit integer support.
@ halide_target_feature_user_context
Generated code takes a user_context pointer as first argument.
@ halide_target_feature_no_bounds_query
Disable the bounds querying functionality.
@ halide_target_feature_vulkan_version10
Enable Vulkan v1.0 runtime target support.
@ halide_target_feature_fuzz_float_stores
On every floating point store, set the last bit of the mantissa to zero. Pipelines for which the outp...
@ halide_target_feature_sve
Enable ARM Scalable Vector Extensions.
@ halide_target_feature_x86_apx
Intel x86 APX support. Covers initial set of features released as APX: egpr,push2pop2,...
@ halide_target_feature_hvx_v66
Enable Hexagon v66 architecture.
void halide_free(void *user_context, void *ptr)
bool halide_semaphore_try_acquire(struct halide_semaphore_t *, int n)
@ halide_buffer_flag_device_dirty
@ halide_buffer_flag_host_dirty
bool halide_default_semaphore_try_acquire(struct halide_semaphore_t *, int n)
int halide_error_buffer_allocation_too_large(void *user_context, const char *buffer_name, uint64_t allocation_size, uint64_t max_size)
void halide_cond_wait(struct halide_cond *cond, struct halide_mutex *mutex)
int(* halide_do_par_for_t)(void *, halide_task_t, int, int, uint8_t *)
Set a custom method for performing a parallel for loop.
int halide_set_num_threads(int n)
int halide_copy_to_host(void *user_context, struct halide_buffer_t *buf)
Copy image data from device memory to host memory.
int halide_default_do_par_for(void *user_context, halide_task_t task, int min, int size, uint8_t *closure)
The default versions of the parallel runtime functions.
int halide_msan_annotate_memory_is_initialized(void *user_context, const void *ptr, uint64_t len)
Annotate that a given range of memory has been initialized; only used when Target::MSAN is enabled.
struct halide_profiler_state * halide_profiler_get_state(void)
Get a pointer to the global profiler state for programmatic inspection.
int halide_error_bad_dimensions(void *user_context, const char *func_name, int32_t dimensions_given, int32_t correct_dimensions)
int halide_mutex_array_unlock(struct halide_mutex_array *array, int entry)
int halide_error_constraint_violated(void *user_context, const char *var, int val, const char *constrained_var, int constrained_val)
int halide_default_do_loop_task(void *user_context, halide_loop_task_t f, int min, int extent, uint8_t *closure, void *task_parent)
void halide_profiler_reset(void)
Reset profiler state cheaply.
int(* halide_task_t)(void *user_context, int task_number, uint8_t *closure)
Define halide_do_par_for to replace the default thread pool implementation.
void halide_mutex_lock(struct halide_mutex *mutex)
A basic set of mutex and condition variable functions, which call platform specific code for mutual e...
halide_trace_event_code_t
@ halide_trace_begin_pipeline
@ halide_trace_end_pipeline
@ halide_trace_end_produce
@ halide_trace_end_consume
@ halide_trace_end_realization
@ halide_trace_begin_realization
struct halide_profiler_pipeline_stats * halide_profiler_get_pipeline_state(const char *pipeline_name)
Get a pointer to the pipeline state associated with pipeline_name.
int halide_do_task(void *user_context, halide_task_t f, int idx, uint8_t *closure)
halide_malloc_t halide_set_custom_malloc(halide_malloc_t user_malloc)
int halide_error_device_dirty_with_no_device_support(void *user_context, const char *buffer_name)
int halide_default_semaphore_init(struct halide_semaphore_t *, int n)
void halide_msan_annotate_buffer_is_initialized_as_destructor(void *user_context, void *buffer)
void * halide_get_library_symbol(void *lib, const char *name)
void(* halide_error_handler_t)(void *, const char *)
void halide_device_release(void *user_context, const struct halide_device_interface_t *device_interface)
Release all data associated with the given device interface, in particular all resources (memory,...
int halide_error_bounds_inference_call_failed(void *user_context, const char *extern_stage_name, int result)
Halide calls the functions below on various error conditions.
int halide_error_buffer_extents_negative(void *user_context, const char *buffer_name, int dimension, int extent)
int halide_error_buffer_is_null(void *user_context, const char *routine)
void halide_mutex_unlock(struct halide_mutex *mutex)
int halide_error_constraints_make_required_region_smaller(void *user_context, const char *buffer_name, int dimension, int constrained_min, int constrained_extent, int required_min, int required_extent)
int halide_error_out_of_memory(void *user_context)
void * halide_get_symbol(const char *name)
Halide calls these functions to interact with the underlying system runtime functions.
int32_t halide_debug_to_file(void *user_context, const char *filename, struct halide_buffer_t *buf)
Called when debug_to_file is used inside Halide code.
int halide_error_no_device_interface(void *user_context)
struct halide_thread * halide_spawn_thread(void(*f)(void *), void *closure)
Spawn a thread.
int halide_error_debug_to_file_failed(void *user_context, const char *func, const char *filename, int error_code)
struct halide_dimension_t halide_dimension_t
int halide_error_requirement_failed(void *user_context, const char *condition, const char *message)
void halide_memoization_cache_release(void *user_context, void *host)
If halide_memoization_cache_lookup succeeds, halide_memoization_cache_release must be called to signa...
int(* halide_can_use_target_features_t)(int count, const uint64_t *features)
void halide_register_argv_and_metadata(int(*filter_argv_call)(void **), const struct halide_filter_metadata_t *filter_metadata, const char *const *extra_key_value_pairs)
halide_register_argv_and_metadata() is a user-defined function that must be provided in order to use ...
void *(* halide_get_symbol_t)(const char *name)
int halide_error_param_too_large_f64(void *user_context, const char *param_name, double val, double max_val)
int halide_msan_check_memory_is_initialized(void *user_context, const void *ptr, uint64_t len, const char *name)
Verify that a given range of memory has been initialized; only used when Target::MSAN is enabled.
int(* halide_do_loop_task_t)(void *, halide_loop_task_t, int, int, uint8_t *, void *)
The version of do_task called for loop tasks.
int halide_error_buffer_extents_too_large(void *user_context, const char *buffer_name, int64_t actual_size, int64_t max_size)
int32_t halide_trace(void *user_context, const struct halide_trace_event_t *event)
Called when Funcs are marked as trace_load, trace_store, or trace_realization.
int halide_error_extern_stage_failed(void *user_context, const char *extern_stage_name, int result)
A call to an extern stage failed.
halide_can_use_target_features_t halide_set_custom_can_use_target_features(halide_can_use_target_features_t)
int halide_error_host_is_null(void *user_context, const char *func_name)
void halide_set_trace_file(int fd)
Set the file descriptor that Halide should write binary trace events to.
int halide_memoization_cache_store(void *user_context, const uint8_t *cache_key, int32_t size, struct halide_buffer_t *realized_bounds, int32_t tuple_count, struct halide_buffer_t **tuple_buffers, bool has_eviction_key, uint64_t eviction_key)
Given a cache key for a memoized result, currently constructed from the Func name and top-level Func ...
int halide_buffer_copy(void *user_context, struct halide_buffer_t *src, const struct halide_device_interface_t *dst_device_interface, struct halide_buffer_t *dst)
Copy data from one buffer to another.
int halide_error_vscale_invalid(void *user_context, const char *func_name, int runtime_vscale, int compiletime_vscale)
int halide_error_fold_factor_too_small(void *user_context, const char *func_name, const char *var_name, int fold_factor, const char *loop_name, int required_extent)
int halide_error_param_too_small_f64(void *user_context, const char *param_name, double val, double min_val)
int halide_error_param_too_large_i64(void *user_context, const char *param_name, int64_t val, int64_t max_val)
int halide_error_param_too_small_u64(void *user_context, const char *param_name, uint64_t val, uint64_t min_val)
void(* halide_print_t)(void *, const char *)
halide_trace_t halide_set_custom_trace(halide_trace_t trace)
void halide_print(void *user_context, const char *)
Print a message to stderr.
bool(* halide_semaphore_try_acquire_t)(struct halide_semaphore_t *, int)
void halide_profiler_report(void *user_context)
Print out timing statistics for everything run since the last reset.
void halide_set_gpu_device(int n)
Selects which gpu device to use.
void halide_mutex_array_destroy(void *user_context, void *array)
int32_t halide_default_trace(void *user_context, const struct halide_trace_event_t *event)
int halide_default_can_use_target_features(int count, const uint64_t *features)
This is the default implementation of halide_can_use_target_features; it is provided for convenience ...
int halide_reuse_device_allocations(void *user_context, bool)
Tell Halide whether or not it is permitted to hold onto device allocations to service future requests...
int halide_error_param_too_small_i64(void *user_context, const char *param_name, int64_t val, int64_t min_val)
halide_type_code_t
Types in the halide type system.
@ halide_type_float
IEEE floating point numbers.
@ halide_type_handle
opaque pointer type (void *)
@ halide_type_bfloat
floating point numbers in the bfloat format
@ halide_type_int
signed integers
@ halide_type_uint
unsigned integers
int halide_device_malloc(void *user_context, struct halide_buffer_t *buf, const struct halide_device_interface_t *device_interface)
Allocate device memory to back a halide_buffer_t.
bool halide_can_reuse_device_allocations(void *user_context)
Determines whether on device_free the memory is returned immediately to the device API,...
int halide_mutex_array_lock(struct halide_mutex_array *array, int entry)
void(* halide_free_t)(void *, void *)
int halide_error_storage_bound_too_small(void *user_context, const char *func_name, const char *var_name, int provided_size, int required_size)
int(* halide_loop_task_t)(void *user_context, int min, int extent, uint8_t *closure, void *task_parent)
A task representing a serial for loop evaluated over some range.
void halide_profiler_shutdown(void)
Reset all profiler state.
int halide_error_specialize_fail(void *user_context, const char *message)
int halide_error_device_interface_no_device(void *user_context)
int halide_error_host_and_device_dirty(void *user_context)
int halide_error_access_out_of_bounds(void *user_context, const char *func_name, int dimension, int min_touched, int max_touched, int min_valid, int max_valid)
int halide_error_explicit_bounds_too_small(void *user_context, const char *func_name, const char *var_name, int min_bound, int max_bound, int min_required, int max_required)
Various other error conditions.
void * halide_default_malloc(void *user_context, size_t x)
void * halide_default_load_library(const char *name)
int halide_error_buffer_argument_is_null(void *user_context, const char *buffer_name)
int halide_default_semaphore_release(struct halide_semaphore_t *, int n)
void halide_default_error(void *user_context, const char *)
int halide_device_slice(void *user_context, const struct halide_buffer_t *src, int slice_dim, int slice_pos, struct halide_buffer_t *dst)
Give the destination buffer a device allocation which is an alias for a similar coordinate range in t...
int halide_error_device_crop_failed(void *user_context)
int halide_get_num_threads()
Get or set the number of threads used by Halide's thread pool.
void halide_default_free(void *user_context, void *ptr)
void halide_shutdown_thread_pool(void)
#define HALIDE_MUST_USE_RESULT
void halide_memoization_cache_evict(void *user_context, uint64_t eviction_key)
Evict all cache entries that were tagged with the given eviction_key in the memoize scheduling direct...
halide_do_par_for_t halide_set_custom_do_par_for(halide_do_par_for_t do_par_for)
void halide_join_thread(struct halide_thread *)
Join a thread.
halide_error_code_t
The error codes that may be returned by a Halide pipeline.
@ halide_error_code_no_device_interface
Buffer has a non-zero device but no device interface, which violates a Halide invariant.
@ halide_error_code_symbol_not_found
A runtime symbol could not be loaded.
@ halide_error_code_bad_fold
A fold_storage directive was used on a dimension that is not accessed in a monotonically increasing o...
@ halide_error_code_fold_factor_too_small
A fold_storage directive was used with a fold factor that was too small to store all the values of a ...
@ halide_error_code_split_factor_not_positive
A factor used to split a loop was discovered to be zero or negative at runtime.
@ halide_error_code_device_interface_no_device
Buffer has a non-null device_interface but device is 0, which violates a Halide invariant.
@ halide_error_code_param_too_large
A scalar parameter passed in was greater than its minimum declared value.
@ halide_error_code_param_too_small
A scalar parameter passed in was smaller than its minimum declared value.
@ halide_error_code_access_out_of_bounds
A pipeline would access memory outside of the halide_buffer_t passed in.
@ halide_error_code_specialize_fail
A specialize_fail() schedule branch was selected at runtime.
@ halide_error_code_unimplemented
This part of the Halide runtime is unimplemented on this platform.
@ halide_error_code_requirement_failed
User-specified require() expression was not satisfied.
@ halide_error_code_bad_extern_fold
A folded buffer was passed to an extern stage, but the region touched wraps around the fold boundary.
@ halide_error_code_incompatible_device_interface
An operation on a buffer required an allocation on a particular device interface, but a device alloca...
@ halide_error_code_internal_error
There is a bug in the Halide compiler.
@ halide_error_code_buffer_extents_negative
At least one of the buffer's extents are negative.
@ halide_error_code_constraints_make_required_region_smaller
Applying explicit constraints on the size of an input or output buffer shrank the size of that buffer...
@ halide_error_code_copy_to_device_failed
The Halide runtime encountered an error while trying to copy from host to device.
@ halide_error_code_vscale_invalid
"vscale" value of Scalable Vector detected in runtime does not match the vscale value used in compila...
@ halide_error_code_generic_error
An uncategorized error occurred.
@ halide_error_code_device_crop_failed
Cropping/slicing a buffer failed for some other reason.
@ halide_error_code_success
There was no error.
@ halide_error_code_copy_to_host_failed
The Halide runtime encountered an error while trying to copy from device to host.
@ halide_error_code_trace_failed
Failure recording trace packets for one of the halide_target_feature_trace features.
@ halide_error_code_device_sync_failed
The Halide runtime encountered an error while trying to synchronize with a device.
@ halide_error_code_buffer_argument_is_null
A halide_buffer_t pointer passed in was NULL.
@ halide_error_code_bad_dimensions
The dimensions field of a halide_buffer_t does not match the dimensions of that ImageParam.
@ halide_error_code_device_malloc_failed
The Halide runtime encountered an error while trying to allocate memory on device.
@ halide_error_code_host_and_device_dirty
Buffer has both host and device dirty bits set, which violates a Halide invariant.
@ halide_error_code_debug_to_file_failed
debug_to_file failed to open or write to the specified file.
@ halide_error_code_gpu_device_error
Call(s) to a GPU backend API failed.
@ halide_error_code_buffer_is_null
The halide_buffer_t * passed to a halide runtime routine is nullptr and this is not allowed.
@ halide_error_code_device_crop_unsupported
Attempted to make cropped/sliced alias of a buffer with a device field, but the device_interface does...
@ halide_error_code_device_buffer_copy_failed
The Halide runtime encountered an error while trying to copy from one buffer to another.
@ halide_error_code_device_free_failed
The Halide runtime encountered an error while trying to free a device allocation.
@ halide_error_code_buffer_allocation_too_large
A halide_buffer_t was given that spans more than 2GB of memory.
@ halide_error_code_bad_type
The elem_size field of a halide_buffer_t does not match the size in bytes of the type of that ImagePa...
@ halide_error_code_device_run_failed
The Halide runtime encountered an error while trying to launch a GPU kernel.
@ halide_error_code_device_dirty_with_no_device_support
A buffer with the device_dirty flag set was passed to a pipeline compiled with no device backends ena...
@ halide_error_code_cannot_profile_pipeline
Profiling failed for a pipeline invocation.
@ halide_error_code_explicit_bounds_too_small
A Func was given an explicit bound via Func::bound, but this was not large enough to encompass the re...
@ halide_error_code_buffer_extents_too_large
A halide_buffer_t was given with extents that multiply to a number greater than 2^31-1.
@ halide_error_code_device_detach_native_failed
The Halide runtime encountered an error while trying to detach a native device handle.
@ halide_error_code_storage_bound_too_small
An explicit storage bound provided is too small to store all the values produced by the function.
@ halide_error_code_out_of_memory
A call to halide_malloc returned NULL.
@ halide_error_code_device_wrap_native_failed
The Halide runtime encountered an error while trying to wrap a native device handle.
@ halide_error_code_constraint_violated
A constraint on a size or stride of an input or output buffer was not met by the halide_buffer_t pass...
@ halide_error_code_unaligned_host_ptr
The Halide runtime encountered a host pointer that violated the alignment set for it by way of a call...
@ halide_error_code_host_is_null
The host field on an input or output was null, the device field was not zero, and the pipeline tries ...
void * halide_load_library(const char *name)
void halide_memoization_cache_set_size(int64_t size)
Set the soft maximum amount of memory, in bytes, that the LRU cache will use to memoize Func results.
#define HALIDE_ALWAYS_INLINE
void halide_cond_broadcast(struct halide_cond *cond)
int halide_device_free(void *user_context, struct halide_buffer_t *buf)
Free device memory.
void halide_profiler_unlock(struct halide_profiler_state *)
void halide_memoization_cache_cleanup(void)
Free all memory and resources associated with the memoization cache.
int halide_device_sync_global(void *user_context, const struct halide_device_interface_t *device_interface)
Wait for current GPU operations to complete.
int halide_error_param_too_large_u64(void *user_context, const char *param_name, uint64_t val, uint64_t max_val)
int32_t(* halide_trace_t)(void *user_context, const struct halide_trace_event_t *)
int(* halide_do_task_t)(void *, halide_task_t, int, uint8_t *)
If you use the default do_par_for, you can still set a custom handler to perform each individual task...
halide_free_t halide_set_custom_free(halide_free_t user_free)
int halide_default_do_parallel_tasks(void *user_context, int num_tasks, struct halide_parallel_task_t *tasks, void *task_parent)
struct halide_mutex_array * halide_mutex_array_create(uint64_t sz)
int halide_get_gpu_device(void *user_context)
Halide calls this to get the desired halide gpu device setting.
int(* halide_semaphore_init_t)(struct halide_semaphore_t *, int)
halide_do_loop_task_t halide_set_custom_do_loop_task(halide_do_loop_task_t do_task)
int halide_device_detach_native(void *user_context, struct halide_buffer_t *buf)
void * halide_default_get_symbol(const char *name)
int halide_copy_to_device(void *user_context, struct halide_buffer_t *buf, const struct halide_device_interface_t *device_interface)
Copy image data from host memory to device memory.
halide_get_library_symbol_t halide_set_custom_get_library_symbol(halide_get_library_symbol_t user_get_library_symbol)
void halide_error(void *user_context, const char *)
Halide calls this function on runtime errors (for example bounds checking failures).
int halide_device_release_crop(void *user_context, struct halide_buffer_t *buf)
Release any resources associated with a cropped/sliced view of another buffer.
int halide_can_use_target_features(int count, const uint64_t *features)
This function is called internally by Halide in some situations to determine if the current execution...
int halide_error_split_factor_not_positive(void *user_context, const char *func_name, const char *orig, const char *outer, const char *inner, const char *factor_str, int factor)
int halide_error_bad_type(void *user_context, const char *func_name, uint32_t type_given, uint32_t correct_type)
halide_do_task_t halide_set_custom_do_task(halide_do_task_t do_task)
int halide_do_parallel_tasks(void *user_context, int num_tasks, struct halide_parallel_task_t *tasks, void *task_parent)
Enqueue some number of the tasks described above and wait for them to complete.
void *(* halide_malloc_t)(void *, size_t)
int halide_device_wrap_native(void *user_context, struct halide_buffer_t *buf, uint64_t handle, const struct halide_device_interface_t *device_interface)
Wrap or detach a native device handle, setting the device field and device_interface field as appropr...
int halide_get_trace_file(void *user_context)
Halide calls this to retrieve the file descriptor to write binary trace events to.
int halide_shutdown_trace(void)
If tracing is writing to a file.
float halide_float16_bits_to_float(uint16_t)
Read bits representing a half precision floating point number and return the float that represents th...
int halide_error_unaligned_host_ptr(void *user_context, const char *func_name, int alignment)
int halide_profiler_sample(struct halide_profiler_state *s, uint64_t *prev_t)
Collects profiling information.
void halide_register_device_allocation_pool(struct halide_device_allocation_pool *)
Register a callback to be informed when halide_reuse_device_allocations(false) is called,...
#define HALIDE_ATTRIBUTE_ALIGN(x)
int halide_msan_check_buffer_is_initialized(void *user_context, struct halide_buffer_t *buffer, const char *buf_name)
Verify that the data pointed to by the halide_buffer_t is initialized (but not the halide_buffer_t it...
int(* halide_do_parallel_tasks_t)(void *, int, struct halide_parallel_task_t *, void *task_parent)
Provide an entire custom tasking runtime via function pointers.
@ halide_argument_kind_output_buffer
@ halide_argument_kind_input_scalar
@ halide_argument_kind_input_buffer
int halide_semaphore_release(struct halide_semaphore_t *, int n)
struct halide_buffer_t halide_buffer_t
The raw representation of an image passed around by generated Halide code.
auto begin(reverse_adaptor< T > i)
bool operator<(const ConstantInterval &a, const ConstantInterval &b)
Expr with_lanes(const Expr &x, int lanes)
Rewrite the expression x to have lanes lanes.
auto end(reverse_adaptor< T > i)
auto operator==(const Other &a, const GeneratorParam< T > &b) -> decltype(a==(T) b)
Equality comparison between GeneratorParam<T> and any type that supports operator== with T.
auto operator!=(const Other &a, const GeneratorParam< T > &b) -> decltype(a !=(T) b)
Inequality comparison between between GeneratorParam<T> and any type that supports operator!...
unsigned __INT64_TYPE__ uint64_t
signed __INT64_TYPE__ int64_t
__UINTPTR_TYPE__ uintptr_t
signed __INT32_TYPE__ int32_t
unsigned __INT8_TYPE__ uint8_t
__PTRDIFF_TYPE__ ptrdiff_t
WEAK void(*)(void *, const char *) halide_set_custom_print(void(*print)(void *, const char *))
unsigned __INT16_TYPE__ uint16_t
unsigned __INT32_TYPE__ uint32_t
signed __INT16_TYPE__ int16_t
signed __INT8_TYPE__ int8_t
WEAK void(*)(void *, const char *) halide_set_error_handler(void(*handler)(void *, const char *))
The raw representation of an image passed around by generated Halide code.
void * padding
Pads the buffer up to a multiple of 8 bytes.
int32_t dimensions
The dimensionality of the buffer.
halide_dimension_t * dim
The shape of the buffer.
uint64_t device
A device-handle for e.g.
uint8_t * host
A pointer to the start of the data in main memory.
struct halide_type_t type
The type of each buffer element.
const struct halide_device_interface_t * device_interface
The interface used to interpret the above handle.
uint64_t flags
flags with various meanings.
Cross platform condition variable.
struct halide_device_allocation_pool * next
int(* release_unused)(void *user_context)
Each GPU API provides a halide_device_interface_t struct pointing to the code that manages device all...
int(* device_slice)(void *user_context, const struct halide_buffer_t *src, int slice_dim, int slice_pos, struct halide_buffer_t *dst)
int(* device_and_host_malloc)(void *user_context, struct halide_buffer_t *buf, const struct halide_device_interface_t *device_interface)
const struct halide_device_interface_impl_t * impl
int(* wrap_native)(void *user_context, struct halide_buffer_t *buf, uint64_t handle, const struct halide_device_interface_t *device_interface)
int(* compute_capability)(void *user_context, int *major, int *minor)
int(* device_release_crop)(void *user_context, struct halide_buffer_t *buf)
int(* device_crop)(void *user_context, const struct halide_buffer_t *src, struct halide_buffer_t *dst)
void(* device_release)(void *user_context, const struct halide_device_interface_t *device_interface)
int(* copy_to_host)(void *user_context, struct halide_buffer_t *buf)
int(* copy_to_device)(void *user_context, struct halide_buffer_t *buf, const struct halide_device_interface_t *device_interface)
int(* device_free)(void *user_context, struct halide_buffer_t *buf)
int(* device_sync)(void *user_context, struct halide_buffer_t *buf)
int(* detach_native)(void *user_context, struct halide_buffer_t *buf)
int(* device_and_host_free)(void *user_context, struct halide_buffer_t *buf)
int(* device_malloc)(void *user_context, struct halide_buffer_t *buf, const struct halide_device_interface_t *device_interface)
int(* buffer_copy)(void *user_context, struct halide_buffer_t *src, const struct halide_device_interface_t *dst_device_interface, struct halide_buffer_t *dst)
Obsolete version of halide_filter_argument_t; only present in code that wrote halide_filter_metadata_...
const struct halide_scalar_value_t * min
const struct halide_scalar_value_t * def
const struct halide_scalar_value_t * max
struct halide_type_t type
halide_filter_argument_t is essentially a plain-C-struct equivalent to Halide::Argument; most user co...
const struct halide_scalar_value_t * scalar_estimate
const struct halide_scalar_value_t * scalar_max
int64_t const *const * buffer_estimates
const struct halide_scalar_value_t * scalar_def
struct halide_type_t type
const struct halide_scalar_value_t * scalar_min
A type traits template to provide a halide_handle_cplusplus_type value from a C++ type.
struct halide_mutex * array
A parallel task to be passed to halide_do_parallel_tasks.
struct halide_semaphore_acquire_t * semaphores
The global state of the profiler.
void(* get_remote_profiler_state)(int *func, int *active_workers)
If this callback is defined, the profiler asserts that there is a single live instance,...
struct halide_thread * sampling_thread
Retrieve remote profiler state.
int sleep_time
The amount of time the profiler thread sleeps between samples in microseconds.
struct halide_profiler_pipeline_stats * pipelines
A linked list of stats gathered for each pipeline.
struct halide_mutex lock
Guards access to the fields below.
struct halide_profiler_instance_state * instances
The running instances of Halide pipelines.
int shutdown
Set to 1 when you want the profiler to wait for all running instances to finish and then stop gracefu...
halide_scalar_value_t is a simple union able to represent all the well-known scalar values in a filte...
union halide_scalar_value_t::@3 u
A struct representing a semaphore and a number of items that must be acquired from it.
struct halide_semaphore_t * semaphore
An opaque struct representing a semaphore.
void * value
If the event type is a load or a store, this points to the value being loaded or stored.
int32_t * coordinates
For loads and stores, an array which contains the location being accessed.
const char * func
The name of the Func or Pipeline that this event refers to.
const char * trace_tag
For halide_trace_tag, this points to a read-only null-terminated string of arbitrary text.
struct halide_type_t type
If the event type is a load or a store, this is the type of the data.
int32_t value_index
If this was a load or store of a Tuple-valued Func, this is which tuple element was accessed.
enum halide_trace_event_code_t event
The type of event.
int32_t dimensions
The length of the coordinates array.
The header of a packet in a binary trace.
uint32_t size
The total size of this packet in bytes.
int32_t id
The id of this packet (for the purpose of parent_id).
enum halide_trace_event_code_t event
struct halide_type_t type
The remaining fields are equivalent to those in halide_trace_event_t.
A runtime tag for a type in the halide type system.
uint8_t bits
The number of bits of precision of a single scalar value of this type.
uint16_t lanes
How many elements in a vector.
uint8_t code
The basic type code: signed integer, unsigned integer, or floating point.