Halide 19.0.0
Halide compiler and libraries
Loading...
Searching...
No Matches
HalideRuntime.h File Reference

This file declares the routines used by Halide internally in its runtime. More...

#include <stdbool.h>
#include <stddef.h>
#include <stdint.h>
#include <string.h>

Go to the source code of this file.

Classes

struct  halide_mutex
 Cross-platform mutex. More...
 
struct  halide_cond
 Cross platform condition variable. More...
 
struct  halide_semaphore_t
 An opaque struct representing a semaphore. More...
 
struct  halide_semaphore_acquire_t
 A struct representing a semaphore and a number of items that must be acquired from it. More...
 
struct  halide_parallel_task_t
 A parallel task to be passed to halide_do_parallel_tasks. More...
 
struct  halide_type_t
 A runtime tag for a type in the halide type system. More...
 
struct  halide_trace_event_t
 
struct  halide_trace_packet_t
 The header of a packet in a binary trace. More...
 
struct  halide_device_interface_t
 Each GPU API provides a halide_device_interface_t struct pointing to the code that manages device allocations. More...
 
struct  halide_dimension_t
 
struct  halide_buffer_t
 The raw representation of an image passed around by generated Halide code. More...
 
struct  halide_scalar_value_t
 halide_scalar_value_t is a simple union able to represent all the well-known scalar values in a filter argument. More...
 
struct  halide_filter_argument_t_v0
 Obsolete version of halide_filter_argument_t; only present in code that wrote halide_filter_metadata_t version 0. More...
 
struct  halide_filter_argument_t
 halide_filter_argument_t is essentially a plain-C-struct equivalent to Halide::Argument; most user code will never need to create one. More...
 
struct  halide_filter_metadata_t
 
struct  halide_profiler_state
 The global state of the profiler. More...
 
struct  halide_device_allocation_pool
 

Macros

#define HALIDE_VERSION_MAJOR   19
 
#define HALIDE_VERSION_MINOR   0
 
#define HALIDE_VERSION_PATCH   0
 
#define HALIDE_ALWAYS_INLINE   inline __attribute__((always_inline))
 
#define HALIDE_NEVER_INLINE   __attribute__((noinline))
 
#define HALIDE_MUST_USE_RESULT
 
#define HALIDE_FUNCTION_ATTRS
 
#define HALIDE_EXPORT_SYMBOL   __attribute__((visibility("default")))
 
#define HALIDE_ATTRIBUTE_ALIGN(x)
 
#define HALIDE_ATTRIBUTE_DEPRECATED(x)
 

Typedefs

typedef void(* halide_print_t) (void *, const char *)
 
typedef void(* halide_error_handler_t) (void *, const char *)
 
typedef int(* halide_task_t) (void *user_context, int task_number, uint8_t *closure)
 Define halide_do_par_for to replace the default thread pool implementation.
 
typedef int(* halide_do_par_for_t) (void *, halide_task_t, int, int, uint8_t *)
 Set a custom method for performing a parallel for loop.
 
typedef int(* halide_semaphore_init_t) (struct halide_semaphore_t *, int)
 
typedef int(* halide_semaphore_release_t) (struct halide_semaphore_t *, int)
 
typedef bool(* halide_semaphore_try_acquire_t) (struct halide_semaphore_t *, int)
 
typedef int(* halide_loop_task_t) (void *user_context, int min, int extent, uint8_t *closure, void *task_parent)
 A task representing a serial for loop evaluated over some range.
 
typedef int(* halide_do_task_t) (void *, halide_task_t, int, uint8_t *)
 If you use the default do_par_for, you can still set a custom handler to perform each individual task.
 
typedef int(* halide_do_loop_task_t) (void *, halide_loop_task_t, int, int, uint8_t *, void *)
 The version of do_task called for loop tasks.
 
typedef int(* halide_do_parallel_tasks_t) (void *, int, struct halide_parallel_task_t *, void *task_parent)
 Provide an entire custom tasking runtime via function pointers.
 
typedef void *(* halide_malloc_t) (void *, size_t)
 
typedef void(* halide_free_t) (void *, void *)
 
typedef void *(* halide_get_symbol_t) (const char *name)
 
typedef void *(* halide_load_library_t) (const char *name)
 
typedef void *(* halide_get_library_symbol_t) (void *lib, const char *name)
 
typedef enum halide_type_code_t halide_type_code_t
 Types in the halide type system.
 
typedef int32_t(* halide_trace_t) (void *user_context, const struct halide_trace_event_t *)
 
typedef enum halide_target_feature_t halide_target_feature_t
 Optional features a compilation Target can have.
 
typedef int(* halide_can_use_target_features_t) (int count, const uint64_t *features)
 
typedef struct halide_dimension_t halide_dimension_t
 
typedef struct halide_buffer_t halide_buffer_t
 The raw representation of an image passed around by generated Halide code.
 

Enumerations

enum  halide_type_code_t {
  halide_type_int = 0 , halide_type_uint = 1 , halide_type_float = 2 , halide_type_handle = 3 ,
  halide_type_bfloat = 4
}
 Types in the halide type system. More...
 
enum  halide_trace_event_code_t {
  halide_trace_load = 0 , halide_trace_store = 1 , halide_trace_begin_realization = 2 , halide_trace_end_realization = 3 ,
  halide_trace_produce = 4 , halide_trace_end_produce = 5 , halide_trace_consume = 6 , halide_trace_end_consume = 7 ,
  halide_trace_begin_pipeline = 8 , halide_trace_end_pipeline = 9 , halide_trace_tag = 10
}
 
enum  halide_error_code_t {
  halide_error_code_success = 0 , halide_error_code_generic_error = -1 , halide_error_code_explicit_bounds_too_small = -2 , halide_error_code_bad_type = -3 ,
  halide_error_code_access_out_of_bounds = -4 , halide_error_code_buffer_allocation_too_large = -5 , halide_error_code_buffer_extents_too_large = -6 , halide_error_code_constraints_make_required_region_smaller = -7 ,
  halide_error_code_constraint_violated = -8 , halide_error_code_param_too_small = -9 , halide_error_code_param_too_large = -10 , halide_error_code_out_of_memory = -11 ,
  halide_error_code_buffer_argument_is_null = -12 , halide_error_code_debug_to_file_failed = -13 , halide_error_code_copy_to_host_failed = -14 , halide_error_code_copy_to_device_failed = -15 ,
  halide_error_code_device_malloc_failed = -16 , halide_error_code_device_sync_failed = -17 , halide_error_code_device_free_failed = -18 , halide_error_code_no_device_interface = -19 ,
  halide_error_code_unimplemented = -20 , halide_error_code_symbol_not_found = -21 , halide_error_code_internal_error = -22 , halide_error_code_device_run_failed = -23 ,
  halide_error_code_unaligned_host_ptr = -24 , halide_error_code_bad_fold = -25 , halide_error_code_fold_factor_too_small = -26 , halide_error_code_requirement_failed = -27 ,
  halide_error_code_buffer_extents_negative = -28 , halide_error_code_gpu_device_error = -29 , halide_error_code_trace_failed = -30 , halide_error_code_specialize_fail = -31 ,
  halide_error_code_device_wrap_native_failed = -32 , halide_error_code_device_detach_native_failed = -33 , halide_error_code_host_is_null = -34 , halide_error_code_bad_extern_fold = -35 ,
  halide_error_code_device_interface_no_device = -36 , halide_error_code_host_and_device_dirty = -37 , halide_error_code_buffer_is_null = -38 , halide_error_code_device_buffer_copy_failed = -39 ,
  halide_error_code_device_crop_unsupported = -40 , halide_error_code_device_crop_failed = -41 , halide_error_code_incompatible_device_interface = -42 , halide_error_code_bad_dimensions = -43 ,
  halide_error_code_device_dirty_with_no_device_support = -44 , halide_error_code_storage_bound_too_small = -45 , halide_error_code_split_factor_not_positive = -46 , halide_error_code_vscale_invalid = -47 ,
  halide_error_code_cannot_profile_pipeline = -48
}
 The error codes that may be returned by a Halide pipeline. More...
 
enum  halide_target_feature_t {
  halide_target_feature_jit = 0 , halide_target_feature_debug , halide_target_feature_no_asserts , halide_target_feature_no_bounds_query ,
  halide_target_feature_sse41 , halide_target_feature_avx , halide_target_feature_avx2 , halide_target_feature_fma ,
  halide_target_feature_fma4 , halide_target_feature_f16c , halide_target_feature_armv7s , halide_target_feature_no_neon ,
  halide_target_feature_vsx , halide_target_feature_power_arch_2_07 , halide_target_feature_cuda , halide_target_feature_cuda_capability30 ,
  halide_target_feature_cuda_capability32 , halide_target_feature_cuda_capability35 , halide_target_feature_cuda_capability50 , halide_target_feature_cuda_capability61 ,
  halide_target_feature_cuda_capability70 , halide_target_feature_cuda_capability75 , halide_target_feature_cuda_capability80 , halide_target_feature_cuda_capability86 ,
  halide_target_feature_opencl , halide_target_feature_cl_doubles , halide_target_feature_cl_atomic64 , halide_target_feature_user_context ,
  halide_target_feature_profile , halide_target_feature_no_runtime , halide_target_feature_metal , halide_target_feature_c_plus_plus_mangling ,
  halide_target_feature_large_buffers , halide_target_feature_hvx_128 , halide_target_feature_hvx_v62 , halide_target_feature_fuzz_float_stores ,
  halide_target_feature_soft_float_abi , halide_target_feature_msan , halide_target_feature_avx512 , halide_target_feature_avx512_knl ,
  halide_target_feature_avx512_skylake , halide_target_feature_avx512_cannonlake , halide_target_feature_avx512_zen4 , halide_target_feature_avx512_sapphirerapids ,
  halide_target_feature_trace_loads , halide_target_feature_trace_stores , halide_target_feature_trace_realizations , halide_target_feature_trace_pipeline ,
  halide_target_feature_hvx_v65 , halide_target_feature_hvx_v66 , halide_target_feature_hvx_v68 , halide_target_feature_cl_half ,
  halide_target_feature_strict_float , halide_target_feature_tsan , halide_target_feature_asan , halide_target_feature_d3d12compute ,
  halide_target_feature_check_unsafe_promises , halide_target_feature_hexagon_dma , halide_target_feature_embed_bitcode , halide_target_feature_enable_llvm_loop_opt ,
  halide_target_feature_wasm_mvponly , halide_target_feature_wasm_simd128 , halide_target_feature_wasm_threads , halide_target_feature_wasm_bulk_memory ,
  halide_target_feature_webgpu , halide_target_feature_sve , halide_target_feature_sve2 , halide_target_feature_egl ,
  halide_target_feature_arm_dot_prod , halide_target_feature_arm_fp16 , halide_llvm_large_code_model , halide_target_feature_rvv ,
  halide_target_feature_armv8a , halide_target_feature_armv81a , halide_target_feature_armv82a , halide_target_feature_armv83a ,
  halide_target_feature_armv84a , halide_target_feature_armv85a , halide_target_feature_armv86a , halide_target_feature_armv87a ,
  halide_target_feature_armv88a , halide_target_feature_armv89a , halide_target_feature_sanitizer_coverage , halide_target_feature_profile_by_timer ,
  halide_target_feature_spirv , halide_target_feature_vulkan , halide_target_feature_vulkan_int8 , halide_target_feature_vulkan_int16 ,
  halide_target_feature_vulkan_int64 , halide_target_feature_vulkan_float16 , halide_target_feature_vulkan_float64 , halide_target_feature_vulkan_version10 ,
  halide_target_feature_vulkan_version12 , halide_target_feature_vulkan_version13 , halide_target_feature_semihosting , halide_target_feature_avx10_1 ,
  halide_target_feature_x86_apx , halide_target_feature_end
}
 Optional features a compilation Target can have. More...
 
enum  halide_buffer_flags { halide_buffer_flag_host_dirty = 1 , halide_buffer_flag_device_dirty = 2 }
 
enum  halide_argument_kind_t { halide_argument_kind_input_scalar = 0 , halide_argument_kind_input_buffer = 1 , halide_argument_kind_output_buffer = 2 }
 

Functions

void halide_print (void *user_context, const char *)
 Print a message to stderr.
 
void halide_default_print (void *user_context, const char *)
 
halide_print_t halide_set_custom_print (halide_print_t print)
 
void halide_error (void *user_context, const char *)
 Halide calls this function on runtime errors (for example bounds checking failures).
 
void halide_default_error (void *user_context, const char *)
 
halide_error_handler_t halide_set_error_handler (halide_error_handler_t handler)
 
void halide_mutex_lock (struct halide_mutex *mutex)
 A basic set of mutex and condition variable functions, which call platform specific code for mutual exclusion.
 
void halide_mutex_unlock (struct halide_mutex *mutex)
 
void halide_cond_signal (struct halide_cond *cond)
 
void halide_cond_broadcast (struct halide_cond *cond)
 
void halide_cond_wait (struct halide_cond *cond, struct halide_mutex *mutex)
 
struct halide_mutex_arrayhalide_mutex_array_create (uint64_t sz)
 
void halide_mutex_array_destroy (void *user_context, void *array)
 
int halide_mutex_array_lock (struct halide_mutex_array *array, int entry)
 
int halide_mutex_array_unlock (struct halide_mutex_array *array, int entry)
 
int halide_do_par_for (void *user_context, halide_task_t task, int min, int size, uint8_t *closure)
 
void halide_shutdown_thread_pool (void)
 
halide_do_par_for_t halide_set_custom_do_par_for (halide_do_par_for_t do_par_for)
 
int halide_semaphore_init (struct halide_semaphore_t *, int n)
 
int halide_semaphore_release (struct halide_semaphore_t *, int n)
 
bool halide_semaphore_try_acquire (struct halide_semaphore_t *, int n)
 
int halide_do_parallel_tasks (void *user_context, int num_tasks, struct halide_parallel_task_t *tasks, void *task_parent)
 Enqueue some number of the tasks described above and wait for them to complete.
 
halide_do_task_t halide_set_custom_do_task (halide_do_task_t do_task)
 
int halide_do_task (void *user_context, halide_task_t f, int idx, uint8_t *closure)
 
halide_do_loop_task_t halide_set_custom_do_loop_task (halide_do_loop_task_t do_task)
 
int halide_do_loop_task (void *user_context, halide_loop_task_t f, int min, int extent, uint8_t *closure, void *task_parent)
 
void halide_set_custom_parallel_runtime (halide_do_par_for_t, halide_do_task_t, halide_do_loop_task_t, halide_do_parallel_tasks_t, halide_semaphore_init_t, halide_semaphore_try_acquire_t, halide_semaphore_release_t)
 
int halide_default_do_par_for (void *user_context, halide_task_t task, int min, int size, uint8_t *closure)
 The default versions of the parallel runtime functions.
 
int halide_default_do_parallel_tasks (void *user_context, int num_tasks, struct halide_parallel_task_t *tasks, void *task_parent)
 
int halide_default_do_task (void *user_context, halide_task_t f, int idx, uint8_t *closure)
 
int halide_default_do_loop_task (void *user_context, halide_loop_task_t f, int min, int extent, uint8_t *closure, void *task_parent)
 
int halide_default_semaphore_init (struct halide_semaphore_t *, int n)
 
int halide_default_semaphore_release (struct halide_semaphore_t *, int n)
 
bool halide_default_semaphore_try_acquire (struct halide_semaphore_t *, int n)
 
struct halide_thread * halide_spawn_thread (void(*f)(void *), void *closure)
 Spawn a thread.
 
void halide_join_thread (struct halide_thread *)
 Join a thread.
 
int halide_set_num_threads (int n)
 Set the number of threads used by Halide's thread pool.
 
void * halide_malloc (void *user_context, size_t x)
 Halide calls these functions to allocate and free memory.
 
void halide_free (void *user_context, void *ptr)
 
void * halide_default_malloc (void *user_context, size_t x)
 
void halide_default_free (void *user_context, void *ptr)
 
halide_malloc_t halide_set_custom_malloc (halide_malloc_t user_malloc)
 
halide_free_t halide_set_custom_free (halide_free_t user_free)
 
void * halide_get_symbol (const char *name)
 Halide calls these functions to interact with the underlying system runtime functions.
 
void * halide_load_library (const char *name)
 
void * halide_get_library_symbol (void *lib, const char *name)
 
void * halide_default_get_symbol (const char *name)
 
void * halide_default_load_library (const char *name)
 
void * halide_default_get_library_symbol (void *lib, const char *name)
 
halide_get_symbol_t halide_set_custom_get_symbol (halide_get_symbol_t user_get_symbol)
 
halide_load_library_t halide_set_custom_load_library (halide_load_library_t user_load_library)
 
halide_get_library_symbol_t halide_set_custom_get_library_symbol (halide_get_library_symbol_t user_get_library_symbol)
 
int32_t halide_debug_to_file (void *user_context, const char *filename, struct halide_buffer_t *buf)
 Called when debug_to_file is used inside Halide code.
 
int32_t halide_trace (void *user_context, const struct halide_trace_event_t *event)
 Called when Funcs are marked as trace_load, trace_store, or trace_realization.
 
int32_t halide_default_trace (void *user_context, const struct halide_trace_event_t *event)
 
halide_trace_t halide_set_custom_trace (halide_trace_t trace)
 
void halide_set_trace_file (int fd)
 Set the file descriptor that Halide should write binary trace events to.
 
int halide_get_trace_file (void *user_context)
 Halide calls this to retrieve the file descriptor to write binary trace events to.
 
int halide_shutdown_trace (void)
 If tracing is writing to a file.
 
void halide_device_release (void *user_context, const struct halide_device_interface_t *device_interface)
 Release all data associated with the given device interface, in particular all resources (memory, texture, context handles) allocated by Halide.
 
int halide_copy_to_host (void *user_context, struct halide_buffer_t *buf)
 Copy image data from device memory to host memory.
 
int halide_copy_to_device (void *user_context, struct halide_buffer_t *buf, const struct halide_device_interface_t *device_interface)
 Copy image data from host memory to device memory.
 
int halide_buffer_copy (void *user_context, struct halide_buffer_t *src, const struct halide_device_interface_t *dst_device_interface, struct halide_buffer_t *dst)
 Copy data from one buffer to another.
 
int halide_device_crop (void *user_context, const struct halide_buffer_t *src, struct halide_buffer_t *dst)
 Give the destination buffer a device allocation which is an alias for the same coordinate range in the source buffer.
 
int halide_device_slice (void *user_context, const struct halide_buffer_t *src, int slice_dim, int slice_pos, struct halide_buffer_t *dst)
 Give the destination buffer a device allocation which is an alias for a similar coordinate range in the source buffer, but with one dimension sliced away in the dst.
 
int halide_device_release_crop (void *user_context, struct halide_buffer_t *buf)
 Release any resources associated with a cropped/sliced view of another buffer.
 
int halide_device_sync (void *user_context, struct halide_buffer_t *buf)
 Wait for current GPU operations to complete.
 
int halide_device_sync_global (void *user_context, const struct halide_device_interface_t *device_interface)
 Wait for current GPU operations to complete.
 
int halide_device_malloc (void *user_context, struct halide_buffer_t *buf, const struct halide_device_interface_t *device_interface)
 Allocate device memory to back a halide_buffer_t.
 
int halide_device_free (void *user_context, struct halide_buffer_t *buf)
 Free device memory.
 
int halide_device_wrap_native (void *user_context, struct halide_buffer_t *buf, uint64_t handle, const struct halide_device_interface_t *device_interface)
 Wrap or detach a native device handle, setting the device field and device_interface field as appropriate for the given GPU API.
 
int halide_device_detach_native (void *user_context, struct halide_buffer_t *buf)
 
void halide_set_gpu_device (int n)
 Selects which gpu device to use.
 
int halide_get_gpu_device (void *user_context)
 Halide calls this to get the desired halide gpu device setting.
 
void halide_memoization_cache_set_size (int64_t size)
 Set the soft maximum amount of memory, in bytes, that the LRU cache will use to memoize Func results.
 
int halide_memoization_cache_lookup (void *user_context, const uint8_t *cache_key, int32_t size, struct halide_buffer_t *realized_bounds, int32_t tuple_count, struct halide_buffer_t **tuple_buffers)
 Given a cache key for a memoized result, currently constructed from the Func name and top-level Func name plus the arguments of the computation, determine if the result is in the cache and return it if so.
 
int halide_memoization_cache_store (void *user_context, const uint8_t *cache_key, int32_t size, struct halide_buffer_t *realized_bounds, int32_t tuple_count, struct halide_buffer_t **tuple_buffers, bool has_eviction_key, uint64_t eviction_key)
 Given a cache key for a memoized result, currently constructed from the Func name and top-level Func name plus the arguments of the computation, store the result in the cache for futre access by halide_memoization_cache_lookup.
 
void halide_memoization_cache_evict (void *user_context, uint64_t eviction_key)
 Evict all cache entries that were tagged with the given eviction_key in the memoize scheduling directive.
 
void halide_memoization_cache_release (void *user_context, void *host)
 If halide_memoization_cache_lookup succeeds, halide_memoization_cache_release must be called to signal the storage is no longer being used by the caller.
 
void halide_memoization_cache_cleanup (void)
 Free all memory and resources associated with the memoization cache.
 
int halide_msan_check_memory_is_initialized (void *user_context, const void *ptr, uint64_t len, const char *name)
 Verify that a given range of memory has been initialized; only used when Target::MSAN is enabled.
 
int halide_msan_check_buffer_is_initialized (void *user_context, struct halide_buffer_t *buffer, const char *buf_name)
 Verify that the data pointed to by the halide_buffer_t is initialized (but not the halide_buffer_t itself), using halide_msan_check_memory_is_initialized() for checking.
 
int halide_msan_annotate_memory_is_initialized (void *user_context, const void *ptr, uint64_t len)
 Annotate that a given range of memory has been initialized; only used when Target::MSAN is enabled.
 
int halide_msan_annotate_buffer_is_initialized (void *user_context, struct halide_buffer_t *buffer)
 Mark the data pointed to by the halide_buffer_t as initialized (but not the halide_buffer_t itself), using halide_msan_annotate_memory_is_initialized() for marking.
 
void halide_msan_annotate_buffer_is_initialized_as_destructor (void *user_context, void *buffer)
 
int halide_error_bounds_inference_call_failed (void *user_context, const char *extern_stage_name, int result)
 Halide calls the functions below on various error conditions.
 
int halide_error_extern_stage_failed (void *user_context, const char *extern_stage_name, int result)
 A call to an extern stage failed.
 
int halide_error_explicit_bounds_too_small (void *user_context, const char *func_name, const char *var_name, int min_bound, int max_bound, int min_required, int max_required)
 Various other error conditions.
 
int halide_error_bad_type (void *user_context, const char *func_name, uint32_t type_given, uint32_t correct_type)
 
int halide_error_bad_dimensions (void *user_context, const char *func_name, int32_t dimensions_given, int32_t correct_dimensions)
 
int halide_error_access_out_of_bounds (void *user_context, const char *func_name, int dimension, int min_touched, int max_touched, int min_valid, int max_valid)
 
int halide_error_buffer_allocation_too_large (void *user_context, const char *buffer_name, uint64_t allocation_size, uint64_t max_size)
 
int halide_error_buffer_extents_negative (void *user_context, const char *buffer_name, int dimension, int extent)
 
int halide_error_buffer_extents_too_large (void *user_context, const char *buffer_name, int64_t actual_size, int64_t max_size)
 
int halide_error_constraints_make_required_region_smaller (void *user_context, const char *buffer_name, int dimension, int constrained_min, int constrained_extent, int required_min, int required_extent)
 
int halide_error_constraint_violated (void *user_context, const char *var, int val, const char *constrained_var, int constrained_val)
 
int halide_error_param_too_small_i64 (void *user_context, const char *param_name, int64_t val, int64_t min_val)
 
int halide_error_param_too_small_u64 (void *user_context, const char *param_name, uint64_t val, uint64_t min_val)
 
int halide_error_param_too_small_f64 (void *user_context, const char *param_name, double val, double min_val)
 
int halide_error_param_too_large_i64 (void *user_context, const char *param_name, int64_t val, int64_t max_val)
 
int halide_error_param_too_large_u64 (void *user_context, const char *param_name, uint64_t val, uint64_t max_val)
 
int halide_error_param_too_large_f64 (void *user_context, const char *param_name, double val, double max_val)
 
int halide_error_out_of_memory (void *user_context)
 
int halide_error_buffer_argument_is_null (void *user_context, const char *buffer_name)
 
int halide_error_debug_to_file_failed (void *user_context, const char *func, const char *filename, int error_code)
 
int halide_error_unaligned_host_ptr (void *user_context, const char *func_name, int alignment)
 
int halide_error_host_is_null (void *user_context, const char *func_name)
 
int halide_error_bad_fold (void *user_context, const char *func_name, const char *var_name, const char *loop_name)
 
int halide_error_bad_extern_fold (void *user_context, const char *func_name, int dim, int min, int extent, int valid_min, int fold_factor)
 
int halide_error_fold_factor_too_small (void *user_context, const char *func_name, const char *var_name, int fold_factor, const char *loop_name, int required_extent)
 
int halide_error_requirement_failed (void *user_context, const char *condition, const char *message)
 
int halide_error_specialize_fail (void *user_context, const char *message)
 
int halide_error_no_device_interface (void *user_context)
 
int halide_error_device_interface_no_device (void *user_context)
 
int halide_error_host_and_device_dirty (void *user_context)
 
int halide_error_buffer_is_null (void *user_context, const char *routine)
 
int halide_error_device_dirty_with_no_device_support (void *user_context, const char *buffer_name)
 
int halide_error_storage_bound_too_small (void *user_context, const char *func_name, const char *var_name, int provided_size, int required_size)
 
int halide_error_device_crop_failed (void *user_context)
 
int halide_error_split_factor_not_positive (void *user_context, const char *func_name, const char *orig, const char *outer, const char *inner, const char *factor_str, int factor)
 
int halide_error_vscale_invalid (void *user_context, const char *func_name, int runtime_vscale, int compiletime_vscale)
 
int halide_can_use_target_features (int count, const uint64_t *features)
 This function is called internally by Halide in some situations to determine if the current execution environment can support the given set of halide_target_feature_t flags.
 
halide_can_use_target_features_t halide_set_custom_can_use_target_features (halide_can_use_target_features_t)
 
int halide_default_can_use_target_features (int count, const uint64_t *features)
 This is the default implementation of halide_can_use_target_features; it is provided for convenience of user code that may wish to extend halide_can_use_target_features but continue providing existing support, e.g.
 
void halide_register_argv_and_metadata (int(*filter_argv_call)(void **), const struct halide_filter_metadata_t *filter_metadata, const char *const *extra_key_value_pairs)
 halide_register_argv_and_metadata() is a user-defined function that must be provided in order to use the registration.cc files produced by Generators when the 'registration' output is requested.
 
struct HALIDE_ATTRIBUTE_ALIGN (8) halide_profiler_func_stats
 The functions below here are relevant for pipelines compiled with the -profile target flag, which runs a sampling profiler thread alongside the pipeline.
 
struct halide_profiler_statehalide_profiler_get_state (void)
 Get a pointer to the global profiler state for programmatic inspection.
 
struct halide_profiler_pipeline_stats * halide_profiler_get_pipeline_state (const char *pipeline_name)
 Get a pointer to the pipeline state associated with pipeline_name.
 
int halide_profiler_sample (struct halide_profiler_state *s, uint64_t *prev_t)
 Collects profiling information.
 
void halide_profiler_reset (void)
 Reset profiler state cheaply.
 
void halide_profiler_shutdown (void)
 Reset all profiler state.
 
void halide_profiler_report (void *user_context)
 Print out timing statistics for everything run since the last reset.
 
void halide_profiler_lock (struct halide_profiler_state *)
 These routines are called to temporarily disable and then reenable the profiler.
 
void halide_profiler_unlock (struct halide_profiler_state *)
 
"Float16" functions

These functions operate of bits (uint16_t) representing a half precision floating point number (IEEE-754 2008 binary16).

float halide_float16_bits_to_float (uint16_t)
 Read bits representing a half precision floating point number and return the float that represents the same value.
 
double halide_float16_bits_to_double (uint16_t)
 Read bits representing a half precision floating point number and return the double that represents the same value.
 
int halide_reuse_device_allocations (void *user_context, bool)
 Tell Halide whether or not it is permitted to hold onto device allocations to service future requests instead of returning them eagerly to the underlying device API.
 
bool halide_can_reuse_device_allocations (void *user_context)
 Determines whether on device_free the memory is returned immediately to the device API, or placed on a free list for future use.
 
void halide_register_device_allocation_pool (struct halide_device_allocation_pool *)
 Register a callback to be informed when halide_reuse_device_allocations(false) is called, and all unused device allocations must be released.
 

Detailed Description

This file declares the routines used by Halide internally in its runtime.

On platforms that support weak linking, these can be replaced with user-defined versions by defining an extern "C" function with the same name and signature.

When doing Just In Time (JIT) compilation members of some_pipeline_or_func.jit_handlers() must be replaced instead. The corresponding methods are documented below.

All of these functions take a "void *user_context" parameter as their first argument; if the Halide kernel that calls back to any of these functions has been compiled with the UserContext feature set on its Target, then the value of that pointer passed from the code that calls the Halide kernel is piped through to the function.

Some of these are also useful to call when using the default implementation. E.g. halide_shutdown_thread_pool.

Note that even on platforms with weak linking, some linker setups may not respect the override you provide. E.g. if the override is in a shared library and the halide object files are linked directly into the output, the builtin versions of the runtime functions will be called. See your linker documentation for more details. On Linux, LD_DYNAMIC_WEAK=1 may help.

Definition in file HalideRuntime.h.

Macro Definition Documentation

◆ HALIDE_VERSION_MAJOR

#define HALIDE_VERSION_MAJOR   19

Definition at line 26 of file HalideRuntime.h.

◆ HALIDE_VERSION_MINOR

#define HALIDE_VERSION_MINOR   0

Definition at line 27 of file HalideRuntime.h.

◆ HALIDE_VERSION_PATCH

#define HALIDE_VERSION_PATCH   0

Definition at line 28 of file HalideRuntime.h.

◆ HALIDE_ALWAYS_INLINE

#define HALIDE_ALWAYS_INLINE   inline __attribute__((always_inline))

Definition at line 49 of file HalideRuntime.h.

◆ HALIDE_NEVER_INLINE

#define HALIDE_NEVER_INLINE   __attribute__((noinline))

Definition at line 50 of file HalideRuntime.h.

◆ HALIDE_MUST_USE_RESULT

#define HALIDE_MUST_USE_RESULT

Definition at line 65 of file HalideRuntime.h.

◆ HALIDE_FUNCTION_ATTRS

#define HALIDE_FUNCTION_ATTRS

Definition at line 76 of file HalideRuntime.h.

◆ HALIDE_EXPORT_SYMBOL

#define HALIDE_EXPORT_SYMBOL   __attribute__((visibility("default")))

Definition at line 83 of file HalideRuntime.h.

◆ HALIDE_ATTRIBUTE_ALIGN

#define HALIDE_ATTRIBUTE_ALIGN ( x)
Value:
__attribute__((aligned(x)))

Definition at line 491 of file HalideRuntime.h.

◆ HALIDE_ATTRIBUTE_DEPRECATED

#define HALIDE_ATTRIBUTE_DEPRECATED ( x)
Value:
__attribute__((deprecated(x)))

Definition at line 1705 of file HalideRuntime.h.

Typedef Documentation

◆ halide_print_t

typedef void(* halide_print_t) (void *, const char *)

Definition at line 170 of file HalideRuntime.h.

◆ halide_error_handler_t

typedef void(* halide_error_handler_t) (void *, const char *)

Definition at line 184 of file HalideRuntime.h.

◆ halide_task_t

typedef int(* halide_task_t) (void *user_context, int task_number, uint8_t *closure)

Define halide_do_par_for to replace the default thread pool implementation.

halide_shutdown_thread_pool can also be called to release resources used by the default thread pool on platforms where it makes sense. See Func::set_custom_do_task and Func::set_custom_do_par_for. Should return zero if all the jobs return zero, or an arbitrarily chosen return value from one of the jobs otherwise.

Definition at line 229 of file HalideRuntime.h.

◆ halide_do_par_for_t

typedef int(* halide_do_par_for_t) (void *, halide_task_t, int, int, uint8_t *)

Set a custom method for performing a parallel for loop.

Returns the old do_par_for handler.

Definition at line 238 of file HalideRuntime.h.

◆ halide_semaphore_init_t

typedef int(* halide_semaphore_init_t) (struct halide_semaphore_t *, int)

Definition at line 255 of file HalideRuntime.h.

◆ halide_semaphore_release_t

typedef int(* halide_semaphore_release_t) (struct halide_semaphore_t *, int)

Definition at line 256 of file HalideRuntime.h.

◆ halide_semaphore_try_acquire_t

typedef bool(* halide_semaphore_try_acquire_t) (struct halide_semaphore_t *, int)

Definition at line 257 of file HalideRuntime.h.

◆ halide_loop_task_t

typedef int(* halide_loop_task_t) (void *user_context, int min, int extent, uint8_t *closure, void *task_parent)

A task representing a serial for loop evaluated over some range.

Note that task_parent is a pass through argument that should be passed to any dependent taks that are invoked using halide_do_parallel_tasks underneath this call.

Definition at line 263 of file HalideRuntime.h.

◆ halide_do_task_t

typedef int(* halide_do_task_t) (void *, halide_task_t, int, uint8_t *)

If you use the default do_par_for, you can still set a custom handler to perform each individual task.

Returns the old handler.

Definition at line 330 of file HalideRuntime.h.

◆ halide_do_loop_task_t

typedef int(* halide_do_loop_task_t) (void *, halide_loop_task_t, int, int, uint8_t *, void *)

The version of do_task called for loop tasks.

By default calls the loop task with the same arguments.

Definition at line 339 of file HalideRuntime.h.

◆ halide_do_parallel_tasks_t

typedef int(* halide_do_parallel_tasks_t) (void *, int, struct halide_parallel_task_t *, void *task_parent)

Provide an entire custom tasking runtime via function pointers.

Note that do_task and semaphore_try_acquire are only ever called by halide_default_do_par_for and halide_default_do_parallel_tasks, so it's only necessary to provide those if you are mixing in the default implementations of do_par_for and do_parallel_tasks.

Definition at line 352 of file HalideRuntime.h.

◆ halide_malloc_t

typedef void *(* halide_malloc_t) (void *, size_t)

Definition at line 429 of file HalideRuntime.h.

◆ halide_free_t

typedef void(* halide_free_t) (void *, void *)

Definition at line 430 of file HalideRuntime.h.

◆ halide_get_symbol_t

typedef void *(* halide_get_symbol_t) (const char *name)

Definition at line 452 of file HalideRuntime.h.

◆ halide_load_library_t

typedef void *(* halide_load_library_t) (const char *name)

Definition at line 453 of file HalideRuntime.h.

◆ halide_get_library_symbol_t

typedef void *(* halide_get_library_symbol_t) (void *lib, const char *name)

Definition at line 454 of file HalideRuntime.h.

◆ halide_type_code_t

Types in the halide type system.

They can be ints, unsigned ints, or floats (of various bit-widths), or a handle (which is always 64-bits). Note that the int/uint/float values do not imply a specific bit width (the bit width is expected to be encoded in a separate value).

◆ halide_trace_t

typedef int32_t(* halide_trace_t) (void *user_context, const struct halide_trace_event_t *)

Definition at line 671 of file HalideRuntime.h.

◆ halide_target_feature_t

Optional features a compilation Target can have.

Be sure to keep this in sync with the Feature enum in Target.h and the implementation of get_runtime_compatible_target in Target.cpp if you add a new feature.

◆ halide_can_use_target_features_t

typedef int(* halide_can_use_target_features_t) (int count, const uint64_t *features)

Definition at line 1484 of file HalideRuntime.h.

◆ halide_dimension_t

typedef struct halide_dimension_t halide_dimension_t

◆ halide_buffer_t

typedef struct halide_buffer_t halide_buffer_t

The raw representation of an image passed around by generated Halide code.

It includes some stuff to track whether the image is not actually in main memory, but instead on a device (like a GPU). For a more convenient C++ wrapper, use Halide::Buffer<T>.

Enumeration Type Documentation

◆ halide_type_code_t

Types in the halide type system.

They can be ints, unsigned ints, or floats (of various bit-widths), or a handle (which is always 64-bits). Note that the int/uint/float values do not imply a specific bit width (the bit width is expected to be encoded in a separate value).

Enumerator
halide_type_int 

signed integers

halide_type_uint 

unsigned integers

halide_type_float 

IEEE floating point numbers.

halide_type_handle 

opaque pointer type (void *)

halide_type_bfloat 

floating point numbers in the bfloat format

Definition at line 473 of file HalideRuntime.h.

◆ halide_trace_event_code_t

Enumerator
halide_trace_load 
halide_trace_store 
halide_trace_begin_realization 
halide_trace_end_realization 
halide_trace_produce 
halide_trace_end_produce 
halide_trace_consume 
halide_trace_end_consume 
halide_trace_begin_pipeline 
halide_trace_end_pipeline 
halide_trace_tag 

Definition at line 574 of file HalideRuntime.h.

◆ halide_error_code_t

The error codes that may be returned by a Halide pipeline.

Enumerator
halide_error_code_success 

There was no error.

This is the value returned by Halide on success.

halide_error_code_generic_error 

An uncategorized error occurred.

Refer to the string passed to halide_error.

halide_error_code_explicit_bounds_too_small 

A Func was given an explicit bound via Func::bound, but this was not large enough to encompass the region that is used of the Func by the rest of the pipeline.

halide_error_code_bad_type 

The elem_size field of a halide_buffer_t does not match the size in bytes of the type of that ImageParam.

Probable type mismatch.

halide_error_code_access_out_of_bounds 

A pipeline would access memory outside of the halide_buffer_t passed in.

halide_error_code_buffer_allocation_too_large 

A halide_buffer_t was given that spans more than 2GB of memory.

halide_error_code_buffer_extents_too_large 

A halide_buffer_t was given with extents that multiply to a number greater than 2^31-1.

halide_error_code_constraints_make_required_region_smaller 

Applying explicit constraints on the size of an input or output buffer shrank the size of that buffer below what will be accessed by the pipeline.

halide_error_code_constraint_violated 

A constraint on a size or stride of an input or output buffer was not met by the halide_buffer_t passed in.

halide_error_code_param_too_small 

A scalar parameter passed in was smaller than its minimum declared value.

halide_error_code_param_too_large 

A scalar parameter passed in was greater than its minimum declared value.

halide_error_code_out_of_memory 

A call to halide_malloc returned NULL.

halide_error_code_buffer_argument_is_null 

A halide_buffer_t pointer passed in was NULL.

halide_error_code_debug_to_file_failed 

debug_to_file failed to open or write to the specified file.

halide_error_code_copy_to_host_failed 

The Halide runtime encountered an error while trying to copy from device to host.

Turn on -debug in your target string to see more details.

halide_error_code_copy_to_device_failed 

The Halide runtime encountered an error while trying to copy from host to device.

Turn on -debug in your target string to see more details.

halide_error_code_device_malloc_failed 

The Halide runtime encountered an error while trying to allocate memory on device.

Turn on -debug in your target string to see more details.

halide_error_code_device_sync_failed 

The Halide runtime encountered an error while trying to synchronize with a device.

Turn on -debug in your target string to see more details.

halide_error_code_device_free_failed 

The Halide runtime encountered an error while trying to free a device allocation.

Turn on -debug in your target string to see more details.

halide_error_code_no_device_interface 

Buffer has a non-zero device but no device interface, which violates a Halide invariant.

halide_error_code_unimplemented 

This part of the Halide runtime is unimplemented on this platform.

halide_error_code_symbol_not_found 

A runtime symbol could not be loaded.

halide_error_code_internal_error 

There is a bug in the Halide compiler.

halide_error_code_device_run_failed 

The Halide runtime encountered an error while trying to launch a GPU kernel.

Turn on -debug in your target string to see more details.

halide_error_code_unaligned_host_ptr 

The Halide runtime encountered a host pointer that violated the alignment set for it by way of a call to set_host_alignment.

halide_error_code_bad_fold 

A fold_storage directive was used on a dimension that is not accessed in a monotonically increasing or decreasing fashion.

halide_error_code_fold_factor_too_small 

A fold_storage directive was used with a fold factor that was too small to store all the values of a producer needed by the consumer.

halide_error_code_requirement_failed 

User-specified require() expression was not satisfied.

halide_error_code_buffer_extents_negative 

At least one of the buffer's extents are negative.

halide_error_code_gpu_device_error 

Call(s) to a GPU backend API failed.

halide_error_code_trace_failed 

Failure recording trace packets for one of the halide_target_feature_trace features.

halide_error_code_specialize_fail 

A specialize_fail() schedule branch was selected at runtime.

halide_error_code_device_wrap_native_failed 

The Halide runtime encountered an error while trying to wrap a native device handle.

Turn on -debug in your target string to see more details.

halide_error_code_device_detach_native_failed 

The Halide runtime encountered an error while trying to detach a native device handle.

Turn on -debug in your target string to see more details.

halide_error_code_host_is_null 

The host field on an input or output was null, the device field was not zero, and the pipeline tries to use the buffer on the host.

You may be passing a GPU-only buffer to a pipeline which is scheduled to use it on the CPU.

halide_error_code_bad_extern_fold 

A folded buffer was passed to an extern stage, but the region touched wraps around the fold boundary.

halide_error_code_device_interface_no_device 

Buffer has a non-null device_interface but device is 0, which violates a Halide invariant.

halide_error_code_host_and_device_dirty 

Buffer has both host and device dirty bits set, which violates a Halide invariant.

halide_error_code_buffer_is_null 

The halide_buffer_t * passed to a halide runtime routine is nullptr and this is not allowed.

halide_error_code_device_buffer_copy_failed 

The Halide runtime encountered an error while trying to copy from one buffer to another.

Turn on -debug in your target string to see more details.

halide_error_code_device_crop_unsupported 

Attempted to make cropped/sliced alias of a buffer with a device field, but the device_interface does not support cropping.

halide_error_code_device_crop_failed 

Cropping/slicing a buffer failed for some other reason.

Turn on -debug in your target string.

halide_error_code_incompatible_device_interface 

An operation on a buffer required an allocation on a particular device interface, but a device allocation already existed on a different device interface.

Free the old one first.

halide_error_code_bad_dimensions 

The dimensions field of a halide_buffer_t does not match the dimensions of that ImageParam.

halide_error_code_device_dirty_with_no_device_support 

A buffer with the device_dirty flag set was passed to a pipeline compiled with no device backends enabled, so it doesn't know how to copy the data back from device memory to host memory.

Either call copy_to_host before calling the Halide pipeline, or enable the appropriate device backend.

halide_error_code_storage_bound_too_small 

An explicit storage bound provided is too small to store all the values produced by the function.

halide_error_code_split_factor_not_positive 

A factor used to split a loop was discovered to be zero or negative at runtime.

halide_error_code_vscale_invalid 

"vscale" value of Scalable Vector detected in runtime does not match the vscale value used in compilation.

halide_error_code_cannot_profile_pipeline 

Profiling failed for a pipeline invocation.

Definition at line 1070 of file HalideRuntime.h.

◆ halide_target_feature_t

Optional features a compilation Target can have.

Be sure to keep this in sync with the Feature enum in Target.h and the implementation of get_runtime_compatible_target in Target.cpp if you add a new feature.

Enumerator
halide_target_feature_jit 

Generate code that will run immediately inside the calling process.

halide_target_feature_debug 

Turn on debug info and output for runtime code.

halide_target_feature_no_asserts 

Disable all runtime checks, for slightly tighter code.

halide_target_feature_no_bounds_query 

Disable the bounds querying functionality.

halide_target_feature_sse41 

Use SSE 4.1 and earlier instructions. Only relevant on x86.

halide_target_feature_avx 

Use AVX 1 instructions. Only relevant on x86.

halide_target_feature_avx2 

Use AVX 2 instructions. Only relevant on x86.

halide_target_feature_fma 

Enable x86 FMA instruction.

halide_target_feature_fma4 

Enable x86 (AMD) FMA4 instruction set.

halide_target_feature_f16c 

Enable x86 16-bit float support.

halide_target_feature_armv7s 

Generate code for ARMv7s. Only relevant for 32-bit ARM.

halide_target_feature_no_neon 

Avoid using NEON instructions. Only relevant for 32-bit ARM.

halide_target_feature_vsx 

Use VSX instructions. Only relevant on POWERPC.

halide_target_feature_power_arch_2_07 

Use POWER ISA 2.07 new instructions. Only relevant on POWERPC.

halide_target_feature_cuda 

Enable the CUDA runtime. Defaults to compute capability 2.0 (Fermi)

halide_target_feature_cuda_capability30 

Enable CUDA compute capability 3.0 (Kepler)

halide_target_feature_cuda_capability32 

Enable CUDA compute capability 3.2 (Tegra K1)

halide_target_feature_cuda_capability35 

Enable CUDA compute capability 3.5 (Kepler)

halide_target_feature_cuda_capability50 

Enable CUDA compute capability 5.0 (Maxwell)

halide_target_feature_cuda_capability61 

Enable CUDA compute capability 6.1 (Pascal)

halide_target_feature_cuda_capability70 

Enable CUDA compute capability 7.0 (Volta)

halide_target_feature_cuda_capability75 

Enable CUDA compute capability 7.5 (Turing)

halide_target_feature_cuda_capability80 

Enable CUDA compute capability 8.0 (Ampere)

halide_target_feature_cuda_capability86 

Enable CUDA compute capability 8.6 (Ampere)

halide_target_feature_opencl 

Enable the OpenCL runtime.

halide_target_feature_cl_doubles 

Enable double support on OpenCL targets.

halide_target_feature_cl_atomic64 

Enable 64-bit atomics operations on OpenCL targets.

halide_target_feature_user_context 

Generated code takes a user_context pointer as first argument.

halide_target_feature_profile 

Launch a sampling profiler alongside the Halide pipeline that monitors and reports the runtime used by each Func.

halide_target_feature_no_runtime 

Do not include a copy of the Halide runtime in any generated object file or assembly.

halide_target_feature_metal 

Enable the (Apple) Metal runtime.

halide_target_feature_c_plus_plus_mangling 

Generate C++ mangled names for result function, et al.

halide_target_feature_large_buffers 

Enable 64-bit buffer indexing to support buffers > 2GB. Ignored if bits != 64.

halide_target_feature_hvx_128 

Enable HVX 128 byte mode.

halide_target_feature_hvx_v62 

Enable Hexagon v62 architecture.

halide_target_feature_fuzz_float_stores 

On every floating point store, set the last bit of the mantissa to zero. Pipelines for which the output is very different with this feature enabled may also produce very different output on different processors.

halide_target_feature_soft_float_abi 

Enable soft float ABI. This only enables the soft float ABI calling convention, which does not necessarily use soft floats.

halide_target_feature_msan 

Enable hooks for MSAN support.

halide_target_feature_avx512 

Enable the base AVX512 subset supported by all AVX512 architectures. The specific feature sets are AVX-512F and AVX512-CD. See https://en.wikipedia.org/wiki/AVX-512 for a description of each AVX subset.

halide_target_feature_avx512_knl 

Enable the AVX512 features supported by Knight's Landing chips, such as the Xeon Phi x200. This includes the base AVX512 set, and also AVX512-CD and AVX512-ER.

halide_target_feature_avx512_skylake 

Enable the AVX512 features supported by Skylake Xeon server processors. This adds AVX512-VL, AVX512-BW, and AVX512-DQ to the base set. The main difference from the base AVX512 set is better support for small integer ops. Note that this does not include the Knight's Landing features. Note also that these features are not available on Skylake desktop and mobile processors.

halide_target_feature_avx512_cannonlake 

Enable the AVX512 features expected to be supported by future Cannonlake processors. This includes all of the Skylake features, plus AVX512-IFMA and AVX512-VBMI.

halide_target_feature_avx512_zen4 

Enable the AVX512 features supported by Zen4 processors. This include all of the Cannonlake features, plus AVX512-VNNI, AVX512-BF16, and more.

halide_target_feature_avx512_sapphirerapids 

Enable the AVX512 features supported by Sapphire Rapids processors. This include all of the Zen4 features, plus AVX-VNNI and AMX instructions.

halide_target_feature_trace_loads 

Trace all loads done by the pipeline. Equivalent to calling Func::trace_loads on every non-inlined Func.

halide_target_feature_trace_stores 

Trace all stores done by the pipeline. Equivalent to calling Func::trace_stores on every non-inlined Func.

halide_target_feature_trace_realizations 

Trace all realizations done by the pipeline. Equivalent to calling Func::trace_realizations on every non-inlined Func.

halide_target_feature_trace_pipeline 

Trace the pipeline.

halide_target_feature_hvx_v65 

Enable Hexagon v65 architecture.

halide_target_feature_hvx_v66 

Enable Hexagon v66 architecture.

halide_target_feature_hvx_v68 

Enable Hexagon v68 architecture.

halide_target_feature_cl_half 

Enable half support on OpenCL targets.

halide_target_feature_strict_float 

Turn off all non-IEEE floating-point optimization. Currently applies only to LLVM targets.

halide_target_feature_tsan 

Enable hooks for TSAN support.

halide_target_feature_asan 

Enable hooks for ASAN support.

halide_target_feature_d3d12compute 

Enable Direct3D 12 Compute runtime.

halide_target_feature_check_unsafe_promises 

Insert assertions for promises.

halide_target_feature_hexagon_dma 

Enable Hexagon DMA buffers.

halide_target_feature_embed_bitcode 

Emulate clang -fembed-bitcode flag.

halide_target_feature_enable_llvm_loop_opt 

Enable loop vectorization + unrolling in LLVM. Overrides halide_target_feature_disable_llvm_loop_opt. (Ignored for non-LLVM targets.)

halide_target_feature_wasm_mvponly 

Disable all extensions to WebAssembly codegen (including +sign-ext and +nontrapping-fptoint, which are on by default).

halide_target_feature_wasm_simd128 

Enable +simd128 instructions for WebAssembly codegen.

halide_target_feature_wasm_threads 

Enable use of threads in WebAssembly codegen. Requires the use of a wasm runtime that provides pthread-compatible wrappers (typically, Emscripten with the -pthreads flag). Unsupported under WASI.

halide_target_feature_wasm_bulk_memory 

Enable +bulk-memory instructions for WebAssembly codegen.

halide_target_feature_webgpu 

Enable the WebGPU runtime.

halide_target_feature_sve 

Enable ARM Scalable Vector Extensions.

halide_target_feature_sve2 

Enable ARM Scalable Vector Extensions v2.

halide_target_feature_egl 

Force use of EGL support.

halide_target_feature_arm_dot_prod 

Enable ARMv8.2-a dotprod extension (i.e. udot and sdot instructions)

halide_target_feature_arm_fp16 

Enable ARMv8.2-a half-precision floating point data processing.

halide_llvm_large_code_model 

Use the LLVM large code model to compile.

halide_target_feature_rvv 

Enable RISCV "V" Vector Extension.

halide_target_feature_armv8a 

Enable ARMv8a instructions.

halide_target_feature_armv81a 

Enable ARMv8.1a instructions.

halide_target_feature_armv82a 

Enable ARMv8.2a instructions.

halide_target_feature_armv83a 

Enable ARMv8.3a instructions.

halide_target_feature_armv84a 

Enable ARMv8.4a instructions.

halide_target_feature_armv85a 

Enable ARMv8.5a instructions.

halide_target_feature_armv86a 

Enable ARMv8.6a instructions.

halide_target_feature_armv87a 

Enable ARMv8.7a instructions.

halide_target_feature_armv88a 

Enable ARMv8.8a instructions.

halide_target_feature_armv89a 

Enable ARMv8.9a instructions.

halide_target_feature_sanitizer_coverage 

Enable hooks for SanitizerCoverage support.

halide_target_feature_profile_by_timer 

Alternative to halide_target_feature_profile using timer interrupt for systems without threads or applicartions that need to avoid them.

halide_target_feature_spirv 

Enable SPIR-V code generation support.

halide_target_feature_vulkan 

Enable Vulkan runtime support.

halide_target_feature_vulkan_int8 

Enable Vulkan 8-bit integer support.

halide_target_feature_vulkan_int16 

Enable Vulkan 16-bit integer support.

halide_target_feature_vulkan_int64 

Enable Vulkan 64-bit integer support.

halide_target_feature_vulkan_float16 

Enable Vulkan 16-bit float support.

halide_target_feature_vulkan_float64 

Enable Vulkan 64-bit float support.

halide_target_feature_vulkan_version10 

Enable Vulkan v1.0 runtime target support.

halide_target_feature_vulkan_version12 

Enable Vulkan v1.2 runtime target support.

halide_target_feature_vulkan_version13 

Enable Vulkan v1.3 runtime target support.

halide_target_feature_semihosting 

Used together with Target::NoOS for the baremetal target built with semihosting library and run with semihosting mode where minimum I/O communication with a host PC is available.

halide_target_feature_avx10_1 

Intel AVX10 version 1 support. vector_bits is used to indicate width.

halide_target_feature_x86_apx 

Intel x86 APX support. Covers initial set of features released as APX: egpr,push2pop2,ppx,ndd .

halide_target_feature_end 

A sentinel. Every target is considered to have this feature, and setting this feature does nothing.

Definition at line 1353 of file HalideRuntime.h.

◆ halide_buffer_flags

Enumerator
halide_buffer_flag_host_dirty 
halide_buffer_flag_device_dirty 

Definition at line 1538 of file HalideRuntime.h.

◆ halide_argument_kind_t

Enumerator
halide_argument_kind_input_scalar 
halide_argument_kind_input_buffer 
halide_argument_kind_output_buffer 

Definition at line 1740 of file HalideRuntime.h.

Function Documentation

◆ halide_print()

void halide_print ( void * user_context,
const char *  )
extern

Print a message to stderr.

Main use is to support tracing functionality, print, and print_when calls. Also called by the default halide_error. This function can be replaced in JITed code by using halide_custom_print and providing an implementation of halide_print in AOT code. See Func::set_custom_print.

◆ halide_default_print()

void halide_default_print ( void * user_context,
const char *  )
extern

◆ halide_set_custom_print()

halide_print_t halide_set_custom_print ( halide_print_t print)
extern

◆ halide_error()

void halide_error ( void * user_context,
const char *  )
extern

Halide calls this function on runtime errors (for example bounds checking failures).

This function can be replaced in JITed code by using Func::set_error_handler, or in AOT code by calling halide_set_error_handler. In AOT code on platforms that support weak linking (i.e. not Windows), you can also override it by simply defining your own halide_error.

Referenced by Halide::Runtime::Internal::PrinterBase::allocation_error(), Halide::Runtime::Internal::MemoryArena::create(), Halide::Runtime::Internal::StringStorage::create(), halide_set_num_threads(), Halide::Runtime::Internal::MemoryArena::reclaim(), Halide::Runtime::Internal::MemoryArena::reserve(), and Halide::Runtime::Internal::StringStorage::terminate().

◆ halide_default_error()

void halide_default_error ( void * user_context,
const char *  )
extern

◆ halide_set_error_handler()

halide_error_handler_t halide_set_error_handler ( halide_error_handler_t handler)
extern

◆ halide_mutex_lock()

◆ halide_mutex_unlock()

◆ halide_cond_signal()

void halide_cond_signal ( struct halide_cond * cond)
extern

◆ halide_cond_broadcast()

◆ halide_cond_wait()

void halide_cond_wait ( struct halide_cond * cond,
struct halide_mutex * mutex )
extern

◆ halide_mutex_array_create()

struct halide_mutex_array * halide_mutex_array_create ( uint64_t sz)
extern

◆ halide_mutex_array_destroy()

void halide_mutex_array_destroy ( void * user_context,
void * array )
extern

Definition at line 931 of file synchronization_common.h.

References halide_mutex_array::array, and halide_free().

◆ halide_mutex_array_lock()

int halide_mutex_array_lock ( struct halide_mutex_array * array,
int entry )
extern

◆ halide_mutex_array_unlock()

int halide_mutex_array_unlock ( struct halide_mutex_array * array,
int entry )
extern

◆ halide_do_par_for()

int halide_do_par_for ( void * user_context,
halide_task_t task,
int min,
int size,
uint8_t * closure )
extern

◆ halide_shutdown_thread_pool()

◆ halide_set_custom_do_par_for()

halide_do_par_for_t halide_set_custom_do_par_for ( halide_do_par_for_t do_par_for)
extern

◆ halide_semaphore_init()

int halide_semaphore_init ( struct halide_semaphore_t * sema,
int n )
extern

◆ halide_semaphore_release()

int halide_semaphore_release ( struct halide_semaphore_t * sema,
int n )
extern

◆ halide_semaphore_try_acquire()

bool halide_semaphore_try_acquire ( struct halide_semaphore_t * sema,
int n )
extern

◆ halide_do_parallel_tasks()

int halide_do_parallel_tasks ( void * user_context,
int num_tasks,
struct halide_parallel_task_t * tasks,
void * task_parent )
extern

Enqueue some number of the tasks described above and wait for them to complete.

While waiting, the calling threads assists with either the tasks enqueued, or other non-blocking tasks in the task system. Note that task_parent should be NULL for top-level calls and the pass through argument if this call is being made from another task.

Definition at line 809 of file thread_pool_common.h.

References Halide::Runtime::Internal::custom_do_parallel_tasks.

◆ halide_set_custom_do_task()

halide_do_task_t halide_set_custom_do_task ( halide_do_task_t do_task)
extern

Definition at line 758 of file thread_pool_common.h.

References Halide::Runtime::Internal::custom_do_task.

◆ halide_do_task()

int halide_do_task ( void * user_context,
halide_task_t f,
int idx,
uint8_t * closure )
extern

◆ halide_set_custom_do_loop_task()

halide_do_loop_task_t halide_set_custom_do_loop_task ( halide_do_loop_task_t do_task)
extern

◆ halide_do_loop_task()

int halide_do_loop_task ( void * user_context,
halide_loop_task_t f,
int min,
int extent,
uint8_t * closure,
void * task_parent )
extern

◆ halide_set_custom_parallel_runtime()

◆ halide_default_do_par_for()

◆ halide_default_do_parallel_tasks()

◆ halide_default_do_task()

int halide_default_do_task ( void * user_context,
halide_task_t f,
int idx,
uint8_t * closure )
extern

Definition at line 596 of file thread_pool_common.h.

◆ halide_default_do_loop_task()

int halide_default_do_loop_task ( void * user_context,
halide_loop_task_t f,
int min,
int extent,
uint8_t * closure,
void * task_parent )
extern

Definition at line 601 of file thread_pool_common.h.

◆ halide_default_semaphore_init()

int halide_default_semaphore_init ( struct halide_semaphore_t * s,
int n )
extern

Definition at line 722 of file thread_pool_common.h.

References halide_semaphore_impl_t::value.

◆ halide_default_semaphore_release()

◆ halide_default_semaphore_try_acquire()

bool halide_default_semaphore_try_acquire ( struct halide_semaphore_t * s,
int n )
extern

◆ halide_spawn_thread()

struct halide_thread * halide_spawn_thread ( void(* )(void *),
void * closure )
extern

Spawn a thread.

Returns a handle to the thread for the purposes of joining it. The thread must be joined in order to clean up any resources associated with it.

Referenced by Halide::Runtime::Internal::enqueue_work_already_locked().

◆ halide_join_thread()

void halide_join_thread ( struct halide_thread * )
extern

Join a thread.

Referenced by halide_shutdown_thread_pool().

◆ halide_set_num_threads()

int halide_set_num_threads ( int n)
extern

Set the number of threads used by Halide's thread pool.

Returns the old number.

n < 0 : error condition n == 0 : use a reasonable system default (typically, number of cpus online). n == 1 : use exactly one thread; this will always enforce serial execution n > 1 : use a pool of exactly n threads.

(Note that this is only guaranteed when using the default implementations of halide_do_par_for(); custom implementations may completely ignore values passed to halide_set_num_threads().)

Definition at line 679 of file thread_pool_common.h.

References Halide::Runtime::Internal::clamp_num_threads(), Halide::Runtime::Internal::default_desired_num_threads(), Halide::Runtime::Internal::work_queue_t::desired_threads_working, halide_error(), halide_mutex_lock(), halide_mutex_unlock(), Halide::Runtime::Internal::work_queue_t::mutex, and Halide::Runtime::Internal::work_queue.

◆ halide_malloc()

void * halide_malloc ( void * user_context,
size_t x )
extern

Halide calls these functions to allocate and free memory.

To replace in AOT code, use the halide_set_custom_malloc and halide_set_custom_free, or (on platforms that support weak linking), simply define these functions yourself. In JIT-compiled code use Func::set_custom_allocator.

If you override them, and find yourself wanting to call the default implementation from within your override, use halide_default_malloc/free.

Note that halide_malloc must return a pointer aligned to the maximum meaningful alignment for the platform for the purpose of vector loads and stores, and with an allocated size that is (at least) an integral multiple of that same alignment. The default implementation uses 32-byte alignment on arm and 64-byte alignment on x86. Additionally, it must be safe to read at least 8 bytes before the start and beyond the end.

Referenced by halide_mutex_array_create().

◆ halide_free()

void halide_free ( void * user_context,
void * ptr )
extern

◆ halide_default_malloc()

void * halide_default_malloc ( void * user_context,
size_t x )
extern

◆ halide_default_free()

void halide_default_free ( void * user_context,
void * ptr )
extern

◆ halide_set_custom_malloc()

halide_malloc_t halide_set_custom_malloc ( halide_malloc_t user_malloc)
extern

◆ halide_set_custom_free()

halide_free_t halide_set_custom_free ( halide_free_t user_free)
extern

◆ halide_get_symbol()

void * halide_get_symbol ( const char * name)
extern

Halide calls these functions to interact with the underlying system runtime functions.

To replace in AOT code on platforms that support weak linking, define these functions yourself, or use the halide_set_custom_load_library() and halide_set_custom_get_library_symbol() functions. In JIT-compiled code, use JITSharedRuntime::set_default_handlers().

halide_load_library and halide_get_library_symbol are equivalent to dlopen and dlsym. halide_get_symbol(sym) is equivalent to dlsym(RTLD_DEFAULT, sym).

◆ halide_load_library()

void * halide_load_library ( const char * name)
extern

◆ halide_get_library_symbol()

void * halide_get_library_symbol ( void * lib,
const char * name )
extern

◆ halide_default_get_symbol()

void * halide_default_get_symbol ( const char * name)
extern

◆ halide_default_load_library()

void * halide_default_load_library ( const char * name)
extern

◆ halide_default_get_library_symbol()

void * halide_default_get_library_symbol ( void * lib,
const char * name )
extern

◆ halide_set_custom_get_symbol()

halide_get_symbol_t halide_set_custom_get_symbol ( halide_get_symbol_t user_get_symbol)
extern

◆ halide_set_custom_load_library()

halide_load_library_t halide_set_custom_load_library ( halide_load_library_t user_load_library)
extern

◆ halide_set_custom_get_library_symbol()

halide_get_library_symbol_t halide_set_custom_get_library_symbol ( halide_get_library_symbol_t user_get_library_symbol)
extern

◆ halide_debug_to_file()

int32_t halide_debug_to_file ( void * user_context,
const char * filename,
struct halide_buffer_t * buf )
extern

Called when debug_to_file is used inside Halide code.

See Func::debug_to_file for how this is called

Cannot be replaced in JITted code at present.

◆ halide_trace()

int32_t halide_trace ( void * user_context,
const struct halide_trace_event_t * event )
extern

Called when Funcs are marked as trace_load, trace_store, or trace_realization.

See Func::set_custom_trace. The default implementation either prints events via halide_print, or if HL_TRACE_FILE is defined, dumps the trace to that file in a sequence of trace packets. The header for a trace packet is defined below. If the trace is going to be large, you may want to make the file a named pipe, and then read from that pipe into gzip.

halide_trace returns a unique ID which will be passed to future events that "belong" to the earlier event as the parent id. The ownership hierarchy looks like:

begin_pipeline +–trace_tag (if any) +–trace_tag (if any) ... +–begin_realization | +–produce | | +–load/store | | +–end_produce | +–consume | | +–load | | +–end_consume | +–end_realization +–end_pipeline

Threading means that ownership cannot be inferred from the ordering of events. There can be many active realizations of a given function, or many active productions for a single realization. Within a single production, the ordering of events is meaningful.

Note that all trace_tag events (if any) will occur just after the begin_pipeline event, but before any begin_realization events. All trace_tags for a given Func will be emitted in the order added.

◆ halide_default_trace()

int32_t halide_default_trace ( void * user_context,
const struct halide_trace_event_t * event )
extern

◆ halide_set_custom_trace()

halide_trace_t halide_set_custom_trace ( halide_trace_t trace)
extern

◆ halide_set_trace_file()

void halide_set_trace_file ( int fd)
extern

Set the file descriptor that Halide should write binary trace events to.

If called with 0 as the argument, Halide outputs trace information to stdout in a human-readable format. If never called, Halide checks the for existence of an environment variable called HL_TRACE_FILE and opens that file. If HL_TRACE_FILE is not defined, it outputs trace information to stdout in a human-readable format.

◆ halide_get_trace_file()

int halide_get_trace_file ( void * user_context)
extern

Halide calls this to retrieve the file descriptor to write binary trace events to.

The default implementation returns the value set by halide_set_trace_file. Implement it yourself if you wish to use a custom file descriptor per user_context. Return zero from your implementation to tell Halide to print human-readable trace information to stdout.

◆ halide_shutdown_trace()

int halide_shutdown_trace ( void )
extern

If tracing is writing to a file.

This call closes that file (flushing the trace). Returns zero on success.

◆ halide_device_release()

void halide_device_release ( void * user_context,
const struct halide_device_interface_t * device_interface )
extern

Release all data associated with the given device interface, in particular all resources (memory, texture, context handles) allocated by Halide.

Must be called explicitly when using AOT compilation. This is not thread-safe with respect to actively running Halide code. Ensure all pipelines are finished before calling this.

◆ halide_copy_to_host()

int halide_copy_to_host ( void * user_context,
struct halide_buffer_t * buf )
extern

Copy image data from device memory to host memory.

This must be called explicitly to copy back the results of a GPU-based filter.

◆ halide_copy_to_device()

int halide_copy_to_device ( void * user_context,
struct halide_buffer_t * buf,
const struct halide_device_interface_t * device_interface )
extern

Copy image data from host memory to device memory.

This should not be called directly; Halide handles copying to the device automatically. If interface is NULL and the buf has a non-zero dev field, the device associated with the dev handle will be used. Otherwise if the dev field is 0 and interface is NULL, an error is returned.

◆ halide_buffer_copy()

int halide_buffer_copy ( void * user_context,
struct halide_buffer_t * src,
const struct halide_device_interface_t * dst_device_interface,
struct halide_buffer_t * dst )
extern

Copy data from one buffer to another.

The buffers may have different shapes and sizes, but the destination buffer's shape must be contained within the source buffer's shape. That is, for each dimension, the min on the destination buffer must be greater than or equal to the min on the source buffer, and min+extent on the destination buffer must be less that or equal to min+extent on the source buffer. The source data is pulled from either device or host memory on the source, depending on the dirty flags. host is preferred if both are valid. The dst_device_interface parameter controls the destination memory space. NULL means host memory.

◆ halide_device_crop()

int halide_device_crop ( void * user_context,
const struct halide_buffer_t * src,
struct halide_buffer_t * dst )
extern

Give the destination buffer a device allocation which is an alias for the same coordinate range in the source buffer.

Modifies the device, device_interface, and the device_dirty flag only. Only supported by some device APIs (others will return halide_error_code_device_crop_unsupported). Call halide_device_release_crop instead of halide_device_free to clean up resources associated with the cropped view. Do not free the device allocation on the source buffer while the destination buffer still lives. Note that the two buffers do not share dirty flags, so care must be taken to update them together as needed. Note that src and dst are required to have the same number of dimensions.

Note also that (in theory) device interfaces which support cropping may still not support cropping a crop (instead, create a new crop of the parent buffer); in practice, no known implementation has this limitation, although it is possible that some future implementations may require it.

◆ halide_device_slice()

int halide_device_slice ( void * user_context,
const struct halide_buffer_t * src,
int slice_dim,
int slice_pos,
struct halide_buffer_t * dst )
extern

Give the destination buffer a device allocation which is an alias for a similar coordinate range in the source buffer, but with one dimension sliced away in the dst.

Modifies the device, device_interface, and the device_dirty flag only. Only supported by some device APIs (others will return halide_error_code_device_crop_unsupported). Call halide_device_release_crop instead of halide_device_free to clean up resources associated with the sliced view. Do not free the device allocation on the source buffer while the destination buffer still lives. Note that the two buffers do not share dirty flags, so care must be taken to update them together as needed. Note that the dst buffer must have exactly one fewer dimension than the src buffer, and that slice_dim and slice_pos must be valid within src.

◆ halide_device_release_crop()

int halide_device_release_crop ( void * user_context,
struct halide_buffer_t * buf )
extern

Release any resources associated with a cropped/sliced view of another buffer.

◆ halide_device_sync()

int halide_device_sync ( void * user_context,
struct halide_buffer_t * buf )
extern

Wait for current GPU operations to complete.

Calling this explicitly should rarely be necessary, except maybe for profiling.

◆ halide_device_sync_global()

int halide_device_sync_global ( void * user_context,
const struct halide_device_interface_t * device_interface )
extern

Wait for current GPU operations to complete.

Calling this explicitly should rarely be necessary, except maybe for profiling. This variation of the synchronizing is useful when a synchronization is desirable without specifying any buffer to synchronize on. Calling this with a null device_interface is always illegal.

◆ halide_device_malloc()

int halide_device_malloc ( void * user_context,
struct halide_buffer_t * buf,
const struct halide_device_interface_t * device_interface )
extern

Allocate device memory to back a halide_buffer_t.

◆ halide_device_free()

int halide_device_free ( void * user_context,
struct halide_buffer_t * buf )
extern

Free device memory.

◆ halide_device_wrap_native()

int halide_device_wrap_native ( void * user_context,
struct halide_buffer_t * buf,
uint64_t handle,
const struct halide_device_interface_t * device_interface )
extern

Wrap or detach a native device handle, setting the device field and device_interface field as appropriate for the given GPU API.

The meaning of the opaque handle is specific to the device interface, so if you know the device interface in use, call the more specific functions in the runtime headers for your specific device API instead (e.g. HalideRuntimeCuda.h).

◆ halide_device_detach_native()

int halide_device_detach_native ( void * user_context,
struct halide_buffer_t * buf )
extern

◆ halide_set_gpu_device()

void halide_set_gpu_device ( int n)
extern

Selects which gpu device to use.

0 is usually the display device. If never called, Halide uses the environment variable HL_GPU_DEVICE. If that variable is unset, Halide uses the last device. Set this to -1 to use the last device.

◆ halide_get_gpu_device()

int halide_get_gpu_device ( void * user_context)
extern

Halide calls this to get the desired halide gpu device setting.

Implement this yourself to use a different gpu device per user_context. The default implementation returns the value set by halide_set_gpu_device, or the environment variable HL_GPU_DEVICE.

Definition at line 53 of file HalidePyTorchCudaHelpers.h.

References Halide::PyTorch::UserContext::device_id.

◆ halide_memoization_cache_set_size()

void halide_memoization_cache_set_size ( int64_t size)
extern

Set the soft maximum amount of memory, in bytes, that the LRU cache will use to memoize Func results.

This is not a strict maximum in that concurrency and simultaneous use of memoized reults larger than the cache size can both cause it to temporariliy be larger than the size specified here.

◆ halide_memoization_cache_lookup()

int halide_memoization_cache_lookup ( void * user_context,
const uint8_t * cache_key,
int32_t size,
struct halide_buffer_t * realized_bounds,
int32_t tuple_count,
struct halide_buffer_t ** tuple_buffers )
extern

Given a cache key for a memoized result, currently constructed from the Func name and top-level Func name plus the arguments of the computation, determine if the result is in the cache and return it if so.

(The internals of the cache key should be considered opaque by this function.) If this routine returns true, it is a cache miss. Otherwise, it will return false and the buffers passed in will be filled, via copying, with memoized data. The last argument is a list if halide_buffer_t pointers which represents the outputs of the memoized Func. If the Func does not return a Tuple, there will only be one halide_buffer_t in the list. The tuple_count parameters determines the length of the list.

The return values are: -1: Signals an error. 0: Success and cache hit. 1: Success and cache miss.

◆ halide_memoization_cache_store()

int halide_memoization_cache_store ( void * user_context,
const uint8_t * cache_key,
int32_t size,
struct halide_buffer_t * realized_bounds,
int32_t tuple_count,
struct halide_buffer_t ** tuple_buffers,
bool has_eviction_key,
uint64_t eviction_key )
extern

Given a cache key for a memoized result, currently constructed from the Func name and top-level Func name plus the arguments of the computation, store the result in the cache for futre access by halide_memoization_cache_lookup.

(The internals of the cache key should be considered opaque by this function.) Data is copied out from the inputs and inputs are unmodified. The last argument is a list if halide_buffer_t pointers which represents the outputs of the memoized Func. If the Func does not return a Tuple, there will only be one halide_buffer_t in the list. The tuple_count parameters determines the length of the list.

If there is a memory allocation failure, the store does not store the data into the cache.

If has_eviction_key is true, the entry is marked with eviction_key to allow removing the key with halide_memoization_cache_evict.

◆ halide_memoization_cache_evict()

void halide_memoization_cache_evict ( void * user_context,
uint64_t eviction_key )
extern

Evict all cache entries that were tagged with the given eviction_key in the memoize scheduling directive.

◆ halide_memoization_cache_release()

void halide_memoization_cache_release ( void * user_context,
void * host )
extern

If halide_memoization_cache_lookup succeeds, halide_memoization_cache_release must be called to signal the storage is no longer being used by the caller.

It will be passed the host pointer of one the buffers returned by halide_memoization_cache_lookup. That is halide_memoization_cache_release will be called multiple times for the case where halide_memoization_cache_lookup is handling multiple buffers. (This corresponds to memoizing a Tuple in Halide.) Note that the host pointer must be sufficient to get to all information the release operation needs. The default Halide cache impleemntation accomplishes this by storing extra data before the start of the user modifiable host storage.

This call is like free and does not have a failure return.

◆ halide_memoization_cache_cleanup()

void halide_memoization_cache_cleanup ( void )
extern

Free all memory and resources associated with the memoization cache.

Must be called at a time when no other threads are accessing the cache.

◆ halide_msan_check_memory_is_initialized()

int halide_msan_check_memory_is_initialized ( void * user_context,
const void * ptr,
uint64_t len,
const char * name )
extern

Verify that a given range of memory has been initialized; only used when Target::MSAN is enabled.

The default implementation simply calls the LLVM-provided __msan_check_mem_is_initialized() function.

The return value should always be zero.

◆ halide_msan_check_buffer_is_initialized()

int halide_msan_check_buffer_is_initialized ( void * user_context,
struct halide_buffer_t * buffer,
const char * buf_name )
extern

Verify that the data pointed to by the halide_buffer_t is initialized (but not the halide_buffer_t itself), using halide_msan_check_memory_is_initialized() for checking.

The default implementation takes pains to only check the active memory ranges (skipping padding), and sorting into ranges to always check the smallest number of ranges, in monotonically increasing memory order.

Most client code should never need to replace the default implementation.

The return value should always be zero.

◆ halide_msan_annotate_memory_is_initialized()

int halide_msan_annotate_memory_is_initialized ( void * user_context,
const void * ptr,
uint64_t len )
extern

Annotate that a given range of memory has been initialized; only used when Target::MSAN is enabled.

The default implementation simply calls the LLVM-provided __msan_unpoison() function.

The return value should always be zero.

Referenced by Halide::Runtime::Internal::PrinterBase::str().

◆ halide_msan_annotate_buffer_is_initialized()

int halide_msan_annotate_buffer_is_initialized ( void * user_context,
struct halide_buffer_t * buffer )
extern

Mark the data pointed to by the halide_buffer_t as initialized (but not the halide_buffer_t itself), using halide_msan_annotate_memory_is_initialized() for marking.

The default implementation takes pains to only mark the active memory ranges (skipping padding), and sorting into ranges to always mark the smallest number of ranges, in monotonically increasing memory order.

Most client code should never need to replace the default implementation.

The return value should always be zero.

◆ halide_msan_annotate_buffer_is_initialized_as_destructor()

void halide_msan_annotate_buffer_is_initialized_as_destructor ( void * user_context,
void * buffer )
extern

◆ halide_error_bounds_inference_call_failed()

int halide_error_bounds_inference_call_failed ( void * user_context,
const char * extern_stage_name,
int result )
extern

Halide calls the functions below on various error conditions.

The default implementations construct an error message, call halide_error, then return the matching error code above. On platforms that support weak linking, you can override these to catch the errors individually. A call into an extern stage for the purposes of bounds inference failed. Returns the error code given by the extern stage.

◆ halide_error_extern_stage_failed()

int halide_error_extern_stage_failed ( void * user_context,
const char * extern_stage_name,
int result )
extern

A call to an extern stage failed.

Returned the error code given by the extern stage.

◆ halide_error_explicit_bounds_too_small()

int halide_error_explicit_bounds_too_small ( void * user_context,
const char * func_name,
const char * var_name,
int min_bound,
int max_bound,
int min_required,
int max_required )
extern

Various other error conditions.

See the enum above for a description of each.

◆ halide_error_bad_type()

int halide_error_bad_type ( void * user_context,
const char * func_name,
uint32_t type_given,
uint32_t correct_type )
extern

◆ halide_error_bad_dimensions()

int halide_error_bad_dimensions ( void * user_context,
const char * func_name,
int32_t dimensions_given,
int32_t correct_dimensions )
extern

◆ halide_error_access_out_of_bounds()

int halide_error_access_out_of_bounds ( void * user_context,
const char * func_name,
int dimension,
int min_touched,
int max_touched,
int min_valid,
int max_valid )
extern

◆ halide_error_buffer_allocation_too_large()

int halide_error_buffer_allocation_too_large ( void * user_context,
const char * buffer_name,
uint64_t allocation_size,
uint64_t max_size )
extern

◆ halide_error_buffer_extents_negative()

int halide_error_buffer_extents_negative ( void * user_context,
const char * buffer_name,
int dimension,
int extent )
extern

◆ halide_error_buffer_extents_too_large()

int halide_error_buffer_extents_too_large ( void * user_context,
const char * buffer_name,
int64_t actual_size,
int64_t max_size )
extern

◆ halide_error_constraints_make_required_region_smaller()

int halide_error_constraints_make_required_region_smaller ( void * user_context,
const char * buffer_name,
int dimension,
int constrained_min,
int constrained_extent,
int required_min,
int required_extent )
extern

◆ halide_error_constraint_violated()

int halide_error_constraint_violated ( void * user_context,
const char * var,
int val,
const char * constrained_var,
int constrained_val )
extern

◆ halide_error_param_too_small_i64()

int halide_error_param_too_small_i64 ( void * user_context,
const char * param_name,
int64_t val,
int64_t min_val )
extern

◆ halide_error_param_too_small_u64()

int halide_error_param_too_small_u64 ( void * user_context,
const char * param_name,
uint64_t val,
uint64_t min_val )
extern

◆ halide_error_param_too_small_f64()

int halide_error_param_too_small_f64 ( void * user_context,
const char * param_name,
double val,
double min_val )
extern

◆ halide_error_param_too_large_i64()

int halide_error_param_too_large_i64 ( void * user_context,
const char * param_name,
int64_t val,
int64_t max_val )
extern

◆ halide_error_param_too_large_u64()

int halide_error_param_too_large_u64 ( void * user_context,
const char * param_name,
uint64_t val,
uint64_t max_val )
extern

◆ halide_error_param_too_large_f64()

int halide_error_param_too_large_f64 ( void * user_context,
const char * param_name,
double val,
double max_val )
extern

◆ halide_error_out_of_memory()

int halide_error_out_of_memory ( void * user_context)
extern

◆ halide_error_buffer_argument_is_null()

int halide_error_buffer_argument_is_null ( void * user_context,
const char * buffer_name )
extern

◆ halide_error_debug_to_file_failed()

int halide_error_debug_to_file_failed ( void * user_context,
const char * func,
const char * filename,
int error_code )
extern

◆ halide_error_unaligned_host_ptr()

int halide_error_unaligned_host_ptr ( void * user_context,
const char * func_name,
int alignment )
extern

◆ halide_error_host_is_null()

int halide_error_host_is_null ( void * user_context,
const char * func_name )
extern

◆ halide_error_bad_fold()

int halide_error_bad_fold ( void * user_context,
const char * func_name,
const char * var_name,
const char * loop_name )
extern

◆ halide_error_bad_extern_fold()

int halide_error_bad_extern_fold ( void * user_context,
const char * func_name,
int dim,
int min,
int extent,
int valid_min,
int fold_factor )
extern

◆ halide_error_fold_factor_too_small()

int halide_error_fold_factor_too_small ( void * user_context,
const char * func_name,
const char * var_name,
int fold_factor,
const char * loop_name,
int required_extent )
extern

◆ halide_error_requirement_failed()

int halide_error_requirement_failed ( void * user_context,
const char * condition,
const char * message )
extern

◆ halide_error_specialize_fail()

int halide_error_specialize_fail ( void * user_context,
const char * message )
extern

◆ halide_error_no_device_interface()

int halide_error_no_device_interface ( void * user_context)
extern

◆ halide_error_device_interface_no_device()

int halide_error_device_interface_no_device ( void * user_context)
extern

◆ halide_error_host_and_device_dirty()

int halide_error_host_and_device_dirty ( void * user_context)
extern

◆ halide_error_buffer_is_null()

int halide_error_buffer_is_null ( void * user_context,
const char * routine )
extern

◆ halide_error_device_dirty_with_no_device_support()

int halide_error_device_dirty_with_no_device_support ( void * user_context,
const char * buffer_name )
extern

◆ halide_error_storage_bound_too_small()

int halide_error_storage_bound_too_small ( void * user_context,
const char * func_name,
const char * var_name,
int provided_size,
int required_size )
extern

◆ halide_error_device_crop_failed()

int halide_error_device_crop_failed ( void * user_context)
extern

◆ halide_error_split_factor_not_positive()

int halide_error_split_factor_not_positive ( void * user_context,
const char * func_name,
const char * orig,
const char * outer,
const char * inner,
const char * factor_str,
int factor )
extern

◆ halide_error_vscale_invalid()

int halide_error_vscale_invalid ( void * user_context,
const char * func_name,
int runtime_vscale,
int compiletime_vscale )
extern

◆ halide_can_use_target_features()

int halide_can_use_target_features ( int count,
const uint64_t * features )
extern

This function is called internally by Halide in some situations to determine if the current execution environment can support the given set of halide_target_feature_t flags.

The implementation must do the following:

– If there are flags set in features that the function knows cannot be supported, return 0. – Otherwise, return 1. – Note that any flags set in features that the function doesn't know how to test should be ignored; this implies that a return value of 1 means "not known to be bad" rather than "known to be good".

In other words: a return value of 0 means "It is not safe to use code compiled with these features", while a return value of 1 means "It is not obviously unsafe to use code compiled with these features".

The default implementation simply calls halide_default_can_use_target_features.

Note that features points to an array of count uint64_t; this array must contain enough bits to represent all the currently known features. Any excess bits must be set to zero.

◆ halide_set_custom_can_use_target_features()

halide_can_use_target_features_t halide_set_custom_can_use_target_features ( halide_can_use_target_features_t )
extern

◆ halide_default_can_use_target_features()

int halide_default_can_use_target_features ( int count,
const uint64_t * features )
extern

This is the default implementation of halide_can_use_target_features; it is provided for convenience of user code that may wish to extend halide_can_use_target_features but continue providing existing support, e.g.

int halide_can_use_target_features(int count, const uint64_t *features) {
     if (features[halide_target_somefeature >> 6] & (1LL << (halide_target_somefeature & 63))) {
         if (!can_use_somefeature()) {
             return 0;
         }
     }
     return halide_default_can_use_target_features(count, features);
}

◆ halide_register_argv_and_metadata()

void halide_register_argv_and_metadata ( int(* filter_argv_call )(void **),
const struct halide_filter_metadata_t * filter_metadata,
const char *const * extra_key_value_pairs )

halide_register_argv_and_metadata() is a user-defined function that must be provided in order to use the registration.cc files produced by Generators when the 'registration' output is requested.

Each registration.cc file provides a static initializer that calls this function with the given filter's argv-call variant, its metadata, and (optionally) and additional textual data that the build system chooses to tack on for its own purposes. Note that this will be called at static-initializer time (i.e., before main() is called), and in an unpredictable order. Note that extra_key_value_pairs may be nullptr; if it's not null, it's expected to be a null-terminated list of strings, with an even number of entries.

◆ HALIDE_ATTRIBUTE_ALIGN()

struct HALIDE_ATTRIBUTE_ALIGN ( 8 )

The functions below here are relevant for pipelines compiled with the -profile target flag, which runs a sampling profiler thread alongside the pipeline.

Per-invocation-of-a-pipeline state.

Per-pipeline state tracked by the sampling profiler.

Per-Func state tracked by the sampling profiler.

These exist in a linked list.

Lives on the stack of the Halide code. Exists in a doubly-linked list to that it can be cleanly removed.

Definition at line 1825 of file HalideRuntime.h.

◆ halide_profiler_get_state()

struct halide_profiler_state * halide_profiler_get_state ( void )
extern

Get a pointer to the global profiler state for programmatic inspection.

Lock it before using to pause the profiler.

◆ halide_profiler_get_pipeline_state()

struct halide_profiler_pipeline_stats * halide_profiler_get_pipeline_state ( const char * pipeline_name)
extern

Get a pointer to the pipeline state associated with pipeline_name.

This function grabs the global profiler state's lock on entry.

◆ halide_profiler_sample()

int halide_profiler_sample ( struct halide_profiler_state * s,
uint64_t * prev_t )
extern

Collects profiling information.

Intended to be called from a timer interrupt handler if timer based profiling is being used. State argument is acquired via halide_profiler_get_pipeline_state. prev_t argument is the previous time and can be used to set a more accurate time interval if desired.

◆ halide_profiler_reset()

void halide_profiler_reset ( void )
extern

Reset profiler state cheaply.

May leave threads running or some memory allocated but all accumulated statistics are reset. Blocks until all running profiled Halide pipelines exit.

◆ halide_profiler_shutdown()

void halide_profiler_shutdown ( void )
extern

Reset all profiler state.

Blocks until all running profiled Halide pipelines exit.

◆ halide_profiler_report()

void halide_profiler_report ( void * user_context)
extern

Print out timing statistics for everything run since the last reset.

Also happens at process exit.

◆ halide_profiler_lock()

void halide_profiler_lock ( struct halide_profiler_state * )
extern

These routines are called to temporarily disable and then reenable the profiler.

◆ halide_profiler_unlock()

void halide_profiler_unlock ( struct halide_profiler_state * )
extern

◆ halide_float16_bits_to_float()

float halide_float16_bits_to_float ( uint16_t )
extern

Read bits representing a half precision floating point number and return the float that represents the same value.

◆ halide_float16_bits_to_double()

double halide_float16_bits_to_double ( uint16_t )
extern

Read bits representing a half precision floating point number and return the double that represents the same value.

Referenced by Halide::Runtime::Internal::PrinterBase::operator<<().

◆ halide_reuse_device_allocations()

int halide_reuse_device_allocations ( void * user_context,
bool  )
extern

Tell Halide whether or not it is permitted to hold onto device allocations to service future requests instead of returning them eagerly to the underlying device API.

Many device allocators are quite slow, so it can be beneficial to set this to true. The default value for now is false.

Note that if enabled, the eviction policy is very simplistic. The 32 most-recently used allocations are preserved, regardless of their size. Additionally, if a call to cuMalloc results in an out-of-memory error, the entire cache is flushed and the allocation is retried. See https://github.com/halide/Halide/issues/4093

If set to false, releases all unused device allocations back to the underlying device APIs. For finer-grained control, see specific methods in each device api runtime.

Note that if the flag is set to true, this call must succeed and return a value of halide_error_code_success (i.e., zero); if you replace the implementation of this call in the runtime, you must honor this contract.

◆ halide_can_reuse_device_allocations()

bool halide_can_reuse_device_allocations ( void * user_context)
extern

Determines whether on device_free the memory is returned immediately to the device API, or placed on a free list for future use.

Override and switch based on the user_context for finer-grained control. By default just returns the value most recently set by the method above.

◆ halide_register_device_allocation_pool()

void halide_register_device_allocation_pool ( struct halide_device_allocation_pool * )
extern

Register a callback to be informed when halide_reuse_device_allocations(false) is called, and all unused device allocations must be released.

The object passed should have global lifetime, and its next field will be clobbered.