Halide 19.0.0
Halide compiler and libraries
|
This file declares the routines used by Halide internally in its runtime. More...
#include <stdbool.h>
#include <stddef.h>
#include <stdint.h>
#include <string.h>
Go to the source code of this file.
Classes | |
struct | halide_mutex |
Cross-platform mutex. More... | |
struct | halide_cond |
Cross platform condition variable. More... | |
struct | halide_semaphore_t |
An opaque struct representing a semaphore. More... | |
struct | halide_semaphore_acquire_t |
A struct representing a semaphore and a number of items that must be acquired from it. More... | |
struct | halide_parallel_task_t |
A parallel task to be passed to halide_do_parallel_tasks. More... | |
struct | halide_type_t |
A runtime tag for a type in the halide type system. More... | |
struct | halide_trace_event_t |
struct | halide_trace_packet_t |
The header of a packet in a binary trace. More... | |
struct | halide_device_interface_t |
Each GPU API provides a halide_device_interface_t struct pointing to the code that manages device allocations. More... | |
struct | halide_dimension_t |
struct | halide_buffer_t |
The raw representation of an image passed around by generated Halide code. More... | |
struct | halide_scalar_value_t |
halide_scalar_value_t is a simple union able to represent all the well-known scalar values in a filter argument. More... | |
struct | halide_filter_argument_t_v0 |
Obsolete version of halide_filter_argument_t; only present in code that wrote halide_filter_metadata_t version 0. More... | |
struct | halide_filter_argument_t |
halide_filter_argument_t is essentially a plain-C-struct equivalent to Halide::Argument; most user code will never need to create one. More... | |
struct | halide_filter_metadata_t |
struct | halide_profiler_state |
The global state of the profiler. More... | |
struct | halide_device_allocation_pool |
Macros | |
#define | HALIDE_VERSION_MAJOR 19 |
#define | HALIDE_VERSION_MINOR 0 |
#define | HALIDE_VERSION_PATCH 0 |
#define | HALIDE_ALWAYS_INLINE inline __attribute__((always_inline)) |
#define | HALIDE_NEVER_INLINE __attribute__((noinline)) |
#define | HALIDE_MUST_USE_RESULT |
#define | HALIDE_FUNCTION_ATTRS |
#define | HALIDE_EXPORT_SYMBOL __attribute__((visibility("default"))) |
#define | HALIDE_ATTRIBUTE_ALIGN(x) |
#define | HALIDE_ATTRIBUTE_DEPRECATED(x) |
Typedefs | |
typedef void(* | halide_print_t) (void *, const char *) |
typedef void(* | halide_error_handler_t) (void *, const char *) |
typedef int(* | halide_task_t) (void *user_context, int task_number, uint8_t *closure) |
Define halide_do_par_for to replace the default thread pool implementation. | |
typedef int(* | halide_do_par_for_t) (void *, halide_task_t, int, int, uint8_t *) |
Set a custom method for performing a parallel for loop. | |
typedef int(* | halide_semaphore_init_t) (struct halide_semaphore_t *, int) |
typedef int(* | halide_semaphore_release_t) (struct halide_semaphore_t *, int) |
typedef bool(* | halide_semaphore_try_acquire_t) (struct halide_semaphore_t *, int) |
typedef int(* | halide_loop_task_t) (void *user_context, int min, int extent, uint8_t *closure, void *task_parent) |
A task representing a serial for loop evaluated over some range. | |
typedef int(* | halide_do_task_t) (void *, halide_task_t, int, uint8_t *) |
If you use the default do_par_for, you can still set a custom handler to perform each individual task. | |
typedef int(* | halide_do_loop_task_t) (void *, halide_loop_task_t, int, int, uint8_t *, void *) |
The version of do_task called for loop tasks. | |
typedef int(* | halide_do_parallel_tasks_t) (void *, int, struct halide_parallel_task_t *, void *task_parent) |
Provide an entire custom tasking runtime via function pointers. | |
typedef void *(* | halide_malloc_t) (void *, size_t) |
typedef void(* | halide_free_t) (void *, void *) |
typedef void *(* | halide_get_symbol_t) (const char *name) |
typedef void *(* | halide_load_library_t) (const char *name) |
typedef void *(* | halide_get_library_symbol_t) (void *lib, const char *name) |
typedef enum halide_type_code_t | halide_type_code_t |
Types in the halide type system. | |
typedef int32_t(* | halide_trace_t) (void *user_context, const struct halide_trace_event_t *) |
typedef enum halide_target_feature_t | halide_target_feature_t |
Optional features a compilation Target can have. | |
typedef int(* | halide_can_use_target_features_t) (int count, const uint64_t *features) |
typedef struct halide_dimension_t | halide_dimension_t |
typedef struct halide_buffer_t | halide_buffer_t |
The raw representation of an image passed around by generated Halide code. | |
Functions | |
void | halide_print (void *user_context, const char *) |
Print a message to stderr. | |
void | halide_default_print (void *user_context, const char *) |
halide_print_t | halide_set_custom_print (halide_print_t print) |
void | halide_error (void *user_context, const char *) |
Halide calls this function on runtime errors (for example bounds checking failures). | |
void | halide_default_error (void *user_context, const char *) |
halide_error_handler_t | halide_set_error_handler (halide_error_handler_t handler) |
void | halide_mutex_lock (struct halide_mutex *mutex) |
A basic set of mutex and condition variable functions, which call platform specific code for mutual exclusion. | |
void | halide_mutex_unlock (struct halide_mutex *mutex) |
void | halide_cond_signal (struct halide_cond *cond) |
void | halide_cond_broadcast (struct halide_cond *cond) |
void | halide_cond_wait (struct halide_cond *cond, struct halide_mutex *mutex) |
struct halide_mutex_array * | halide_mutex_array_create (uint64_t sz) |
void | halide_mutex_array_destroy (void *user_context, void *array) |
int | halide_mutex_array_lock (struct halide_mutex_array *array, int entry) |
int | halide_mutex_array_unlock (struct halide_mutex_array *array, int entry) |
int | halide_do_par_for (void *user_context, halide_task_t task, int min, int size, uint8_t *closure) |
void | halide_shutdown_thread_pool (void) |
halide_do_par_for_t | halide_set_custom_do_par_for (halide_do_par_for_t do_par_for) |
int | halide_semaphore_init (struct halide_semaphore_t *, int n) |
int | halide_semaphore_release (struct halide_semaphore_t *, int n) |
bool | halide_semaphore_try_acquire (struct halide_semaphore_t *, int n) |
int | halide_do_parallel_tasks (void *user_context, int num_tasks, struct halide_parallel_task_t *tasks, void *task_parent) |
Enqueue some number of the tasks described above and wait for them to complete. | |
halide_do_task_t | halide_set_custom_do_task (halide_do_task_t do_task) |
int | halide_do_task (void *user_context, halide_task_t f, int idx, uint8_t *closure) |
halide_do_loop_task_t | halide_set_custom_do_loop_task (halide_do_loop_task_t do_task) |
int | halide_do_loop_task (void *user_context, halide_loop_task_t f, int min, int extent, uint8_t *closure, void *task_parent) |
void | halide_set_custom_parallel_runtime (halide_do_par_for_t, halide_do_task_t, halide_do_loop_task_t, halide_do_parallel_tasks_t, halide_semaphore_init_t, halide_semaphore_try_acquire_t, halide_semaphore_release_t) |
int | halide_default_do_par_for (void *user_context, halide_task_t task, int min, int size, uint8_t *closure) |
The default versions of the parallel runtime functions. | |
int | halide_default_do_parallel_tasks (void *user_context, int num_tasks, struct halide_parallel_task_t *tasks, void *task_parent) |
int | halide_default_do_task (void *user_context, halide_task_t f, int idx, uint8_t *closure) |
int | halide_default_do_loop_task (void *user_context, halide_loop_task_t f, int min, int extent, uint8_t *closure, void *task_parent) |
int | halide_default_semaphore_init (struct halide_semaphore_t *, int n) |
int | halide_default_semaphore_release (struct halide_semaphore_t *, int n) |
bool | halide_default_semaphore_try_acquire (struct halide_semaphore_t *, int n) |
struct halide_thread * | halide_spawn_thread (void(*f)(void *), void *closure) |
Spawn a thread. | |
void | halide_join_thread (struct halide_thread *) |
Join a thread. | |
int | halide_set_num_threads (int n) |
Set the number of threads used by Halide's thread pool. | |
void * | halide_malloc (void *user_context, size_t x) |
Halide calls these functions to allocate and free memory. | |
void | halide_free (void *user_context, void *ptr) |
void * | halide_default_malloc (void *user_context, size_t x) |
void | halide_default_free (void *user_context, void *ptr) |
halide_malloc_t | halide_set_custom_malloc (halide_malloc_t user_malloc) |
halide_free_t | halide_set_custom_free (halide_free_t user_free) |
void * | halide_get_symbol (const char *name) |
Halide calls these functions to interact with the underlying system runtime functions. | |
void * | halide_load_library (const char *name) |
void * | halide_get_library_symbol (void *lib, const char *name) |
void * | halide_default_get_symbol (const char *name) |
void * | halide_default_load_library (const char *name) |
void * | halide_default_get_library_symbol (void *lib, const char *name) |
halide_get_symbol_t | halide_set_custom_get_symbol (halide_get_symbol_t user_get_symbol) |
halide_load_library_t | halide_set_custom_load_library (halide_load_library_t user_load_library) |
halide_get_library_symbol_t | halide_set_custom_get_library_symbol (halide_get_library_symbol_t user_get_library_symbol) |
int32_t | halide_debug_to_file (void *user_context, const char *filename, struct halide_buffer_t *buf) |
Called when debug_to_file is used inside Halide code. | |
int32_t | halide_trace (void *user_context, const struct halide_trace_event_t *event) |
Called when Funcs are marked as trace_load, trace_store, or trace_realization. | |
int32_t | halide_default_trace (void *user_context, const struct halide_trace_event_t *event) |
halide_trace_t | halide_set_custom_trace (halide_trace_t trace) |
void | halide_set_trace_file (int fd) |
Set the file descriptor that Halide should write binary trace events to. | |
int | halide_get_trace_file (void *user_context) |
Halide calls this to retrieve the file descriptor to write binary trace events to. | |
int | halide_shutdown_trace (void) |
If tracing is writing to a file. | |
void | halide_device_release (void *user_context, const struct halide_device_interface_t *device_interface) |
Release all data associated with the given device interface, in particular all resources (memory, texture, context handles) allocated by Halide. | |
int | halide_copy_to_host (void *user_context, struct halide_buffer_t *buf) |
Copy image data from device memory to host memory. | |
int | halide_copy_to_device (void *user_context, struct halide_buffer_t *buf, const struct halide_device_interface_t *device_interface) |
Copy image data from host memory to device memory. | |
int | halide_buffer_copy (void *user_context, struct halide_buffer_t *src, const struct halide_device_interface_t *dst_device_interface, struct halide_buffer_t *dst) |
Copy data from one buffer to another. | |
int | halide_device_crop (void *user_context, const struct halide_buffer_t *src, struct halide_buffer_t *dst) |
Give the destination buffer a device allocation which is an alias for the same coordinate range in the source buffer. | |
int | halide_device_slice (void *user_context, const struct halide_buffer_t *src, int slice_dim, int slice_pos, struct halide_buffer_t *dst) |
Give the destination buffer a device allocation which is an alias for a similar coordinate range in the source buffer, but with one dimension sliced away in the dst. | |
int | halide_device_release_crop (void *user_context, struct halide_buffer_t *buf) |
Release any resources associated with a cropped/sliced view of another buffer. | |
int | halide_device_sync (void *user_context, struct halide_buffer_t *buf) |
Wait for current GPU operations to complete. | |
int | halide_device_sync_global (void *user_context, const struct halide_device_interface_t *device_interface) |
Wait for current GPU operations to complete. | |
int | halide_device_malloc (void *user_context, struct halide_buffer_t *buf, const struct halide_device_interface_t *device_interface) |
Allocate device memory to back a halide_buffer_t. | |
int | halide_device_free (void *user_context, struct halide_buffer_t *buf) |
Free device memory. | |
int | halide_device_wrap_native (void *user_context, struct halide_buffer_t *buf, uint64_t handle, const struct halide_device_interface_t *device_interface) |
Wrap or detach a native device handle, setting the device field and device_interface field as appropriate for the given GPU API. | |
int | halide_device_detach_native (void *user_context, struct halide_buffer_t *buf) |
void | halide_set_gpu_device (int n) |
Selects which gpu device to use. | |
int | halide_get_gpu_device (void *user_context) |
Halide calls this to get the desired halide gpu device setting. | |
void | halide_memoization_cache_set_size (int64_t size) |
Set the soft maximum amount of memory, in bytes, that the LRU cache will use to memoize Func results. | |
int | halide_memoization_cache_lookup (void *user_context, const uint8_t *cache_key, int32_t size, struct halide_buffer_t *realized_bounds, int32_t tuple_count, struct halide_buffer_t **tuple_buffers) |
Given a cache key for a memoized result, currently constructed from the Func name and top-level Func name plus the arguments of the computation, determine if the result is in the cache and return it if so. | |
int | halide_memoization_cache_store (void *user_context, const uint8_t *cache_key, int32_t size, struct halide_buffer_t *realized_bounds, int32_t tuple_count, struct halide_buffer_t **tuple_buffers, bool has_eviction_key, uint64_t eviction_key) |
Given a cache key for a memoized result, currently constructed from the Func name and top-level Func name plus the arguments of the computation, store the result in the cache for futre access by halide_memoization_cache_lookup. | |
void | halide_memoization_cache_evict (void *user_context, uint64_t eviction_key) |
Evict all cache entries that were tagged with the given eviction_key in the memoize scheduling directive. | |
void | halide_memoization_cache_release (void *user_context, void *host) |
If halide_memoization_cache_lookup succeeds, halide_memoization_cache_release must be called to signal the storage is no longer being used by the caller. | |
void | halide_memoization_cache_cleanup (void) |
Free all memory and resources associated with the memoization cache. | |
int | halide_msan_check_memory_is_initialized (void *user_context, const void *ptr, uint64_t len, const char *name) |
Verify that a given range of memory has been initialized; only used when Target::MSAN is enabled. | |
int | halide_msan_check_buffer_is_initialized (void *user_context, struct halide_buffer_t *buffer, const char *buf_name) |
Verify that the data pointed to by the halide_buffer_t is initialized (but not the halide_buffer_t itself), using halide_msan_check_memory_is_initialized() for checking. | |
int | halide_msan_annotate_memory_is_initialized (void *user_context, const void *ptr, uint64_t len) |
Annotate that a given range of memory has been initialized; only used when Target::MSAN is enabled. | |
int | halide_msan_annotate_buffer_is_initialized (void *user_context, struct halide_buffer_t *buffer) |
Mark the data pointed to by the halide_buffer_t as initialized (but not the halide_buffer_t itself), using halide_msan_annotate_memory_is_initialized() for marking. | |
void | halide_msan_annotate_buffer_is_initialized_as_destructor (void *user_context, void *buffer) |
int | halide_error_bounds_inference_call_failed (void *user_context, const char *extern_stage_name, int result) |
Halide calls the functions below on various error conditions. | |
int | halide_error_extern_stage_failed (void *user_context, const char *extern_stage_name, int result) |
A call to an extern stage failed. | |
int | halide_error_explicit_bounds_too_small (void *user_context, const char *func_name, const char *var_name, int min_bound, int max_bound, int min_required, int max_required) |
Various other error conditions. | |
int | halide_error_bad_type (void *user_context, const char *func_name, uint32_t type_given, uint32_t correct_type) |
int | halide_error_bad_dimensions (void *user_context, const char *func_name, int32_t dimensions_given, int32_t correct_dimensions) |
int | halide_error_access_out_of_bounds (void *user_context, const char *func_name, int dimension, int min_touched, int max_touched, int min_valid, int max_valid) |
int | halide_error_buffer_allocation_too_large (void *user_context, const char *buffer_name, uint64_t allocation_size, uint64_t max_size) |
int | halide_error_buffer_extents_negative (void *user_context, const char *buffer_name, int dimension, int extent) |
int | halide_error_buffer_extents_too_large (void *user_context, const char *buffer_name, int64_t actual_size, int64_t max_size) |
int | halide_error_constraints_make_required_region_smaller (void *user_context, const char *buffer_name, int dimension, int constrained_min, int constrained_extent, int required_min, int required_extent) |
int | halide_error_constraint_violated (void *user_context, const char *var, int val, const char *constrained_var, int constrained_val) |
int | halide_error_param_too_small_i64 (void *user_context, const char *param_name, int64_t val, int64_t min_val) |
int | halide_error_param_too_small_u64 (void *user_context, const char *param_name, uint64_t val, uint64_t min_val) |
int | halide_error_param_too_small_f64 (void *user_context, const char *param_name, double val, double min_val) |
int | halide_error_param_too_large_i64 (void *user_context, const char *param_name, int64_t val, int64_t max_val) |
int | halide_error_param_too_large_u64 (void *user_context, const char *param_name, uint64_t val, uint64_t max_val) |
int | halide_error_param_too_large_f64 (void *user_context, const char *param_name, double val, double max_val) |
int | halide_error_out_of_memory (void *user_context) |
int | halide_error_buffer_argument_is_null (void *user_context, const char *buffer_name) |
int | halide_error_debug_to_file_failed (void *user_context, const char *func, const char *filename, int error_code) |
int | halide_error_unaligned_host_ptr (void *user_context, const char *func_name, int alignment) |
int | halide_error_host_is_null (void *user_context, const char *func_name) |
int | halide_error_bad_fold (void *user_context, const char *func_name, const char *var_name, const char *loop_name) |
int | halide_error_bad_extern_fold (void *user_context, const char *func_name, int dim, int min, int extent, int valid_min, int fold_factor) |
int | halide_error_fold_factor_too_small (void *user_context, const char *func_name, const char *var_name, int fold_factor, const char *loop_name, int required_extent) |
int | halide_error_requirement_failed (void *user_context, const char *condition, const char *message) |
int | halide_error_specialize_fail (void *user_context, const char *message) |
int | halide_error_no_device_interface (void *user_context) |
int | halide_error_device_interface_no_device (void *user_context) |
int | halide_error_host_and_device_dirty (void *user_context) |
int | halide_error_buffer_is_null (void *user_context, const char *routine) |
int | halide_error_device_dirty_with_no_device_support (void *user_context, const char *buffer_name) |
int | halide_error_storage_bound_too_small (void *user_context, const char *func_name, const char *var_name, int provided_size, int required_size) |
int | halide_error_device_crop_failed (void *user_context) |
int | halide_error_split_factor_not_positive (void *user_context, const char *func_name, const char *orig, const char *outer, const char *inner, const char *factor_str, int factor) |
int | halide_error_vscale_invalid (void *user_context, const char *func_name, int runtime_vscale, int compiletime_vscale) |
int | halide_can_use_target_features (int count, const uint64_t *features) |
This function is called internally by Halide in some situations to determine if the current execution environment can support the given set of halide_target_feature_t flags. | |
halide_can_use_target_features_t | halide_set_custom_can_use_target_features (halide_can_use_target_features_t) |
int | halide_default_can_use_target_features (int count, const uint64_t *features) |
This is the default implementation of halide_can_use_target_features; it is provided for convenience of user code that may wish to extend halide_can_use_target_features but continue providing existing support, e.g. | |
void | halide_register_argv_and_metadata (int(*filter_argv_call)(void **), const struct halide_filter_metadata_t *filter_metadata, const char *const *extra_key_value_pairs) |
halide_register_argv_and_metadata() is a user-defined function that must be provided in order to use the registration.cc files produced by Generators when the 'registration' output is requested. | |
struct | HALIDE_ATTRIBUTE_ALIGN (8) halide_profiler_func_stats |
The functions below here are relevant for pipelines compiled with the -profile target flag, which runs a sampling profiler thread alongside the pipeline. | |
struct halide_profiler_state * | halide_profiler_get_state (void) |
Get a pointer to the global profiler state for programmatic inspection. | |
struct halide_profiler_pipeline_stats * | halide_profiler_get_pipeline_state (const char *pipeline_name) |
Get a pointer to the pipeline state associated with pipeline_name. | |
int | halide_profiler_sample (struct halide_profiler_state *s, uint64_t *prev_t) |
Collects profiling information. | |
void | halide_profiler_reset (void) |
Reset profiler state cheaply. | |
void | halide_profiler_shutdown (void) |
Reset all profiler state. | |
void | halide_profiler_report (void *user_context) |
Print out timing statistics for everything run since the last reset. | |
void | halide_profiler_lock (struct halide_profiler_state *) |
These routines are called to temporarily disable and then reenable the profiler. | |
void | halide_profiler_unlock (struct halide_profiler_state *) |
"Float16" functions | |
These functions operate of bits ( | |
float | halide_float16_bits_to_float (uint16_t) |
Read bits representing a half precision floating point number and return the float that represents the same value. | |
double | halide_float16_bits_to_double (uint16_t) |
Read bits representing a half precision floating point number and return the double that represents the same value. | |
int | halide_reuse_device_allocations (void *user_context, bool) |
Tell Halide whether or not it is permitted to hold onto device allocations to service future requests instead of returning them eagerly to the underlying device API. | |
bool | halide_can_reuse_device_allocations (void *user_context) |
Determines whether on device_free the memory is returned immediately to the device API, or placed on a free list for future use. | |
void | halide_register_device_allocation_pool (struct halide_device_allocation_pool *) |
Register a callback to be informed when halide_reuse_device_allocations(false) is called, and all unused device allocations must be released. | |
This file declares the routines used by Halide internally in its runtime.
On platforms that support weak linking, these can be replaced with user-defined versions by defining an extern "C" function with the same name and signature.
When doing Just In Time (JIT) compilation members of some_pipeline_or_func.jit_handlers() must be replaced instead. The corresponding methods are documented below.
All of these functions take a "void *user_context" parameter as their first argument; if the Halide kernel that calls back to any of these functions has been compiled with the UserContext feature set on its Target, then the value of that pointer passed from the code that calls the Halide kernel is piped through to the function.
Some of these are also useful to call when using the default implementation. E.g. halide_shutdown_thread_pool.
Note that even on platforms with weak linking, some linker setups may not respect the override you provide. E.g. if the override is in a shared library and the halide object files are linked directly into the output, the builtin versions of the runtime functions will be called. See your linker documentation for more details. On Linux, LD_DYNAMIC_WEAK=1 may help.
Definition in file HalideRuntime.h.
#define HALIDE_VERSION_MAJOR 19 |
Definition at line 26 of file HalideRuntime.h.
#define HALIDE_VERSION_MINOR 0 |
Definition at line 27 of file HalideRuntime.h.
#define HALIDE_VERSION_PATCH 0 |
Definition at line 28 of file HalideRuntime.h.
#define HALIDE_ALWAYS_INLINE inline __attribute__((always_inline)) |
Definition at line 49 of file HalideRuntime.h.
#define HALIDE_NEVER_INLINE __attribute__((noinline)) |
Definition at line 50 of file HalideRuntime.h.
#define HALIDE_MUST_USE_RESULT |
Definition at line 65 of file HalideRuntime.h.
#define HALIDE_FUNCTION_ATTRS |
Definition at line 76 of file HalideRuntime.h.
#define HALIDE_EXPORT_SYMBOL __attribute__((visibility("default"))) |
Definition at line 83 of file HalideRuntime.h.
#define HALIDE_ATTRIBUTE_ALIGN | ( | x | ) |
Definition at line 491 of file HalideRuntime.h.
#define HALIDE_ATTRIBUTE_DEPRECATED | ( | x | ) |
Definition at line 1705 of file HalideRuntime.h.
typedef void(* halide_print_t) (void *, const char *) |
Definition at line 170 of file HalideRuntime.h.
typedef void(* halide_error_handler_t) (void *, const char *) |
Definition at line 184 of file HalideRuntime.h.
typedef int(* halide_task_t) (void *user_context, int task_number, uint8_t *closure) |
Define halide_do_par_for to replace the default thread pool implementation.
halide_shutdown_thread_pool can also be called to release resources used by the default thread pool on platforms where it makes sense. See Func::set_custom_do_task and Func::set_custom_do_par_for. Should return zero if all the jobs return zero, or an arbitrarily chosen return value from one of the jobs otherwise.
Definition at line 229 of file HalideRuntime.h.
typedef int(* halide_do_par_for_t) (void *, halide_task_t, int, int, uint8_t *) |
Set a custom method for performing a parallel for loop.
Returns the old do_par_for handler.
Definition at line 238 of file HalideRuntime.h.
typedef int(* halide_semaphore_init_t) (struct halide_semaphore_t *, int) |
Definition at line 255 of file HalideRuntime.h.
typedef int(* halide_semaphore_release_t) (struct halide_semaphore_t *, int) |
Definition at line 256 of file HalideRuntime.h.
typedef bool(* halide_semaphore_try_acquire_t) (struct halide_semaphore_t *, int) |
Definition at line 257 of file HalideRuntime.h.
typedef int(* halide_loop_task_t) (void *user_context, int min, int extent, uint8_t *closure, void *task_parent) |
A task representing a serial for loop evaluated over some range.
Note that task_parent is a pass through argument that should be passed to any dependent taks that are invoked using halide_do_parallel_tasks underneath this call.
Definition at line 263 of file HalideRuntime.h.
typedef int(* halide_do_task_t) (void *, halide_task_t, int, uint8_t *) |
If you use the default do_par_for, you can still set a custom handler to perform each individual task.
Returns the old handler.
Definition at line 330 of file HalideRuntime.h.
typedef int(* halide_do_loop_task_t) (void *, halide_loop_task_t, int, int, uint8_t *, void *) |
The version of do_task called for loop tasks.
By default calls the loop task with the same arguments.
Definition at line 339 of file HalideRuntime.h.
typedef int(* halide_do_parallel_tasks_t) (void *, int, struct halide_parallel_task_t *, void *task_parent) |
Provide an entire custom tasking runtime via function pointers.
Note that do_task and semaphore_try_acquire are only ever called by halide_default_do_par_for and halide_default_do_parallel_tasks, so it's only necessary to provide those if you are mixing in the default implementations of do_par_for and do_parallel_tasks.
Definition at line 352 of file HalideRuntime.h.
typedef void *(* halide_malloc_t) (void *, size_t) |
Definition at line 429 of file HalideRuntime.h.
typedef void(* halide_free_t) (void *, void *) |
Definition at line 430 of file HalideRuntime.h.
typedef void *(* halide_get_symbol_t) (const char *name) |
Definition at line 452 of file HalideRuntime.h.
typedef void *(* halide_load_library_t) (const char *name) |
Definition at line 453 of file HalideRuntime.h.
typedef void *(* halide_get_library_symbol_t) (void *lib, const char *name) |
Definition at line 454 of file HalideRuntime.h.
typedef enum halide_type_code_t halide_type_code_t |
Types in the halide type system.
They can be ints, unsigned ints, or floats (of various bit-widths), or a handle (which is always 64-bits). Note that the int/uint/float values do not imply a specific bit width (the bit width is expected to be encoded in a separate value).
typedef int32_t(* halide_trace_t) (void *user_context, const struct halide_trace_event_t *) |
Definition at line 671 of file HalideRuntime.h.
typedef enum halide_target_feature_t halide_target_feature_t |
Optional features a compilation Target can have.
Be sure to keep this in sync with the Feature enum in Target.h and the implementation of get_runtime_compatible_target in Target.cpp if you add a new feature.
typedef int(* halide_can_use_target_features_t) (int count, const uint64_t *features) |
Definition at line 1484 of file HalideRuntime.h.
typedef struct halide_dimension_t halide_dimension_t |
typedef struct halide_buffer_t halide_buffer_t |
The raw representation of an image passed around by generated Halide code.
It includes some stuff to track whether the image is not actually in main memory, but instead on a device (like a GPU). For a more convenient C++ wrapper, use Halide::Buffer<T>.
enum halide_type_code_t |
Types in the halide type system.
They can be ints, unsigned ints, or floats (of various bit-widths), or a handle (which is always 64-bits). Note that the int/uint/float values do not imply a specific bit width (the bit width is expected to be encoded in a separate value).
Definition at line 473 of file HalideRuntime.h.
Definition at line 574 of file HalideRuntime.h.
enum halide_error_code_t |
The error codes that may be returned by a Halide pipeline.
Enumerator | |
---|---|
halide_error_code_success | There was no error. This is the value returned by Halide on success. |
halide_error_code_generic_error | An uncategorized error occurred. Refer to the string passed to halide_error. |
halide_error_code_explicit_bounds_too_small | A Func was given an explicit bound via Func::bound, but this was not large enough to encompass the region that is used of the Func by the rest of the pipeline. |
halide_error_code_bad_type | The elem_size field of a halide_buffer_t does not match the size in bytes of the type of that ImageParam. Probable type mismatch. |
halide_error_code_access_out_of_bounds | A pipeline would access memory outside of the halide_buffer_t passed in. |
halide_error_code_buffer_allocation_too_large | A halide_buffer_t was given that spans more than 2GB of memory. |
halide_error_code_buffer_extents_too_large | A halide_buffer_t was given with extents that multiply to a number greater than 2^31-1. |
halide_error_code_constraints_make_required_region_smaller | Applying explicit constraints on the size of an input or output buffer shrank the size of that buffer below what will be accessed by the pipeline. |
halide_error_code_constraint_violated | A constraint on a size or stride of an input or output buffer was not met by the halide_buffer_t passed in. |
halide_error_code_param_too_small | A scalar parameter passed in was smaller than its minimum declared value. |
halide_error_code_param_too_large | A scalar parameter passed in was greater than its minimum declared value. |
halide_error_code_out_of_memory | A call to halide_malloc returned NULL. |
halide_error_code_buffer_argument_is_null | A halide_buffer_t pointer passed in was NULL. |
halide_error_code_debug_to_file_failed | debug_to_file failed to open or write to the specified file. |
halide_error_code_copy_to_host_failed | The Halide runtime encountered an error while trying to copy from device to host. Turn on -debug in your target string to see more details. |
halide_error_code_copy_to_device_failed | The Halide runtime encountered an error while trying to copy from host to device. Turn on -debug in your target string to see more details. |
halide_error_code_device_malloc_failed | The Halide runtime encountered an error while trying to allocate memory on device. Turn on -debug in your target string to see more details. |
halide_error_code_device_sync_failed | The Halide runtime encountered an error while trying to synchronize with a device. Turn on -debug in your target string to see more details. |
halide_error_code_device_free_failed | The Halide runtime encountered an error while trying to free a device allocation. Turn on -debug in your target string to see more details. |
halide_error_code_no_device_interface | Buffer has a non-zero device but no device interface, which violates a Halide invariant. |
halide_error_code_unimplemented | This part of the Halide runtime is unimplemented on this platform. |
halide_error_code_symbol_not_found | A runtime symbol could not be loaded. |
halide_error_code_internal_error | There is a bug in the Halide compiler. |
halide_error_code_device_run_failed | The Halide runtime encountered an error while trying to launch a GPU kernel. Turn on -debug in your target string to see more details. |
halide_error_code_unaligned_host_ptr | The Halide runtime encountered a host pointer that violated the alignment set for it by way of a call to set_host_alignment. |
halide_error_code_bad_fold | A fold_storage directive was used on a dimension that is not accessed in a monotonically increasing or decreasing fashion. |
halide_error_code_fold_factor_too_small | A fold_storage directive was used with a fold factor that was too small to store all the values of a producer needed by the consumer. |
halide_error_code_requirement_failed | User-specified require() expression was not satisfied. |
halide_error_code_buffer_extents_negative | At least one of the buffer's extents are negative. |
halide_error_code_gpu_device_error | Call(s) to a GPU backend API failed. |
halide_error_code_trace_failed | Failure recording trace packets for one of the halide_target_feature_trace features. |
halide_error_code_specialize_fail | A specialize_fail() schedule branch was selected at runtime. |
halide_error_code_device_wrap_native_failed | The Halide runtime encountered an error while trying to wrap a native device handle. Turn on -debug in your target string to see more details. |
halide_error_code_device_detach_native_failed | The Halide runtime encountered an error while trying to detach a native device handle. Turn on -debug in your target string to see more details. |
halide_error_code_host_is_null | The host field on an input or output was null, the device field was not zero, and the pipeline tries to use the buffer on the host. You may be passing a GPU-only buffer to a pipeline which is scheduled to use it on the CPU. |
halide_error_code_bad_extern_fold | A folded buffer was passed to an extern stage, but the region touched wraps around the fold boundary. |
halide_error_code_device_interface_no_device | Buffer has a non-null device_interface but device is 0, which violates a Halide invariant. |
halide_error_code_host_and_device_dirty | Buffer has both host and device dirty bits set, which violates a Halide invariant. |
halide_error_code_buffer_is_null | The halide_buffer_t * passed to a halide runtime routine is nullptr and this is not allowed. |
halide_error_code_device_buffer_copy_failed | The Halide runtime encountered an error while trying to copy from one buffer to another. Turn on -debug in your target string to see more details. |
halide_error_code_device_crop_unsupported | Attempted to make cropped/sliced alias of a buffer with a device field, but the device_interface does not support cropping. |
halide_error_code_device_crop_failed | Cropping/slicing a buffer failed for some other reason. Turn on -debug in your target string. |
halide_error_code_incompatible_device_interface | An operation on a buffer required an allocation on a particular device interface, but a device allocation already existed on a different device interface. Free the old one first. |
halide_error_code_bad_dimensions | The dimensions field of a halide_buffer_t does not match the dimensions of that ImageParam. |
halide_error_code_device_dirty_with_no_device_support | A buffer with the device_dirty flag set was passed to a pipeline compiled with no device backends enabled, so it doesn't know how to copy the data back from device memory to host memory. Either call copy_to_host before calling the Halide pipeline, or enable the appropriate device backend. |
halide_error_code_storage_bound_too_small | An explicit storage bound provided is too small to store all the values produced by the function. |
halide_error_code_split_factor_not_positive | A factor used to split a loop was discovered to be zero or negative at runtime. |
halide_error_code_vscale_invalid | "vscale" value of Scalable Vector detected in runtime does not match the vscale value used in compilation. |
halide_error_code_cannot_profile_pipeline | Profiling failed for a pipeline invocation. |
Definition at line 1070 of file HalideRuntime.h.
Optional features a compilation Target can have.
Be sure to keep this in sync with the Feature enum in Target.h and the implementation of get_runtime_compatible_target in Target.cpp if you add a new feature.
Enumerator | |
---|---|
halide_target_feature_jit | Generate code that will run immediately inside the calling process. |
halide_target_feature_debug | Turn on debug info and output for runtime code. |
halide_target_feature_no_asserts | Disable all runtime checks, for slightly tighter code. |
halide_target_feature_no_bounds_query | Disable the bounds querying functionality. |
halide_target_feature_sse41 | Use SSE 4.1 and earlier instructions. Only relevant on x86. |
halide_target_feature_avx | Use AVX 1 instructions. Only relevant on x86. |
halide_target_feature_avx2 | Use AVX 2 instructions. Only relevant on x86. |
halide_target_feature_fma | Enable x86 FMA instruction. |
halide_target_feature_fma4 | Enable x86 (AMD) FMA4 instruction set. |
halide_target_feature_f16c | Enable x86 16-bit float support. |
halide_target_feature_armv7s | Generate code for ARMv7s. Only relevant for 32-bit ARM. |
halide_target_feature_no_neon | Avoid using NEON instructions. Only relevant for 32-bit ARM. |
halide_target_feature_vsx | Use VSX instructions. Only relevant on POWERPC. |
halide_target_feature_power_arch_2_07 | Use POWER ISA 2.07 new instructions. Only relevant on POWERPC. |
halide_target_feature_cuda | Enable the CUDA runtime. Defaults to compute capability 2.0 (Fermi) |
halide_target_feature_cuda_capability30 | Enable CUDA compute capability 3.0 (Kepler) |
halide_target_feature_cuda_capability32 | Enable CUDA compute capability 3.2 (Tegra K1) |
halide_target_feature_cuda_capability35 | Enable CUDA compute capability 3.5 (Kepler) |
halide_target_feature_cuda_capability50 | Enable CUDA compute capability 5.0 (Maxwell) |
halide_target_feature_cuda_capability61 | Enable CUDA compute capability 6.1 (Pascal) |
halide_target_feature_cuda_capability70 | Enable CUDA compute capability 7.0 (Volta) |
halide_target_feature_cuda_capability75 | Enable CUDA compute capability 7.5 (Turing) |
halide_target_feature_cuda_capability80 | Enable CUDA compute capability 8.0 (Ampere) |
halide_target_feature_cuda_capability86 | Enable CUDA compute capability 8.6 (Ampere) |
halide_target_feature_opencl | Enable the OpenCL runtime. |
halide_target_feature_cl_doubles | Enable double support on OpenCL targets. |
halide_target_feature_cl_atomic64 | Enable 64-bit atomics operations on OpenCL targets. |
halide_target_feature_user_context | Generated code takes a user_context pointer as first argument. |
halide_target_feature_profile | Launch a sampling profiler alongside the Halide pipeline that monitors and reports the runtime used by each Func. |
halide_target_feature_no_runtime | Do not include a copy of the Halide runtime in any generated object file or assembly. |
halide_target_feature_metal | Enable the (Apple) Metal runtime. |
halide_target_feature_c_plus_plus_mangling | Generate C++ mangled names for result function, et al. |
halide_target_feature_large_buffers | Enable 64-bit buffer indexing to support buffers > 2GB. Ignored if bits != 64. |
halide_target_feature_hvx_128 | Enable HVX 128 byte mode. |
halide_target_feature_hvx_v62 | Enable Hexagon v62 architecture. |
halide_target_feature_fuzz_float_stores | On every floating point store, set the last bit of the mantissa to zero. Pipelines for which the output is very different with this feature enabled may also produce very different output on different processors. |
halide_target_feature_soft_float_abi | Enable soft float ABI. This only enables the soft float ABI calling convention, which does not necessarily use soft floats. |
halide_target_feature_msan | Enable hooks for MSAN support. |
halide_target_feature_avx512 | Enable the base AVX512 subset supported by all AVX512 architectures. The specific feature sets are AVX-512F and AVX512-CD. See https://en.wikipedia.org/wiki/AVX-512 for a description of each AVX subset. |
halide_target_feature_avx512_knl | Enable the AVX512 features supported by Knight's Landing chips, such as the Xeon Phi x200. This includes the base AVX512 set, and also AVX512-CD and AVX512-ER. |
halide_target_feature_avx512_skylake | Enable the AVX512 features supported by Skylake Xeon server processors. This adds AVX512-VL, AVX512-BW, and AVX512-DQ to the base set. The main difference from the base AVX512 set is better support for small integer ops. Note that this does not include the Knight's Landing features. Note also that these features are not available on Skylake desktop and mobile processors. |
halide_target_feature_avx512_cannonlake | Enable the AVX512 features expected to be supported by future Cannonlake processors. This includes all of the Skylake features, plus AVX512-IFMA and AVX512-VBMI. |
halide_target_feature_avx512_zen4 | Enable the AVX512 features supported by Zen4 processors. This include all of the Cannonlake features, plus AVX512-VNNI, AVX512-BF16, and more. |
halide_target_feature_avx512_sapphirerapids | Enable the AVX512 features supported by Sapphire Rapids processors. This include all of the Zen4 features, plus AVX-VNNI and AMX instructions. |
halide_target_feature_trace_loads | Trace all loads done by the pipeline. Equivalent to calling Func::trace_loads on every non-inlined Func. |
halide_target_feature_trace_stores | Trace all stores done by the pipeline. Equivalent to calling Func::trace_stores on every non-inlined Func. |
halide_target_feature_trace_realizations | Trace all realizations done by the pipeline. Equivalent to calling Func::trace_realizations on every non-inlined Func. |
halide_target_feature_trace_pipeline | Trace the pipeline. |
halide_target_feature_hvx_v65 | Enable Hexagon v65 architecture. |
halide_target_feature_hvx_v66 | Enable Hexagon v66 architecture. |
halide_target_feature_hvx_v68 | Enable Hexagon v68 architecture. |
halide_target_feature_cl_half | Enable half support on OpenCL targets. |
halide_target_feature_strict_float | Turn off all non-IEEE floating-point optimization. Currently applies only to LLVM targets. |
halide_target_feature_tsan | Enable hooks for TSAN support. |
halide_target_feature_asan | Enable hooks for ASAN support. |
halide_target_feature_d3d12compute | Enable Direct3D 12 Compute runtime. |
halide_target_feature_check_unsafe_promises | Insert assertions for promises. |
halide_target_feature_hexagon_dma | Enable Hexagon DMA buffers. |
halide_target_feature_embed_bitcode | Emulate clang -fembed-bitcode flag. |
halide_target_feature_enable_llvm_loop_opt | Enable loop vectorization + unrolling in LLVM. Overrides halide_target_feature_disable_llvm_loop_opt. (Ignored for non-LLVM targets.) |
halide_target_feature_wasm_mvponly | Disable all extensions to WebAssembly codegen (including +sign-ext and +nontrapping-fptoint, which are on by default). |
halide_target_feature_wasm_simd128 | Enable +simd128 instructions for WebAssembly codegen. |
halide_target_feature_wasm_threads | Enable use of threads in WebAssembly codegen. Requires the use of a wasm runtime that provides pthread-compatible wrappers (typically, Emscripten with the -pthreads flag). Unsupported under WASI. |
halide_target_feature_wasm_bulk_memory | Enable +bulk-memory instructions for WebAssembly codegen. |
halide_target_feature_webgpu | Enable the WebGPU runtime. |
halide_target_feature_sve | Enable ARM Scalable Vector Extensions. |
halide_target_feature_sve2 | Enable ARM Scalable Vector Extensions v2. |
halide_target_feature_egl | Force use of EGL support. |
halide_target_feature_arm_dot_prod | Enable ARMv8.2-a dotprod extension (i.e. udot and sdot instructions) |
halide_target_feature_arm_fp16 | Enable ARMv8.2-a half-precision floating point data processing. |
halide_llvm_large_code_model | Use the LLVM large code model to compile. |
halide_target_feature_rvv | Enable RISCV "V" Vector Extension. |
halide_target_feature_armv8a | Enable ARMv8a instructions. |
halide_target_feature_armv81a | Enable ARMv8.1a instructions. |
halide_target_feature_armv82a | Enable ARMv8.2a instructions. |
halide_target_feature_armv83a | Enable ARMv8.3a instructions. |
halide_target_feature_armv84a | Enable ARMv8.4a instructions. |
halide_target_feature_armv85a | Enable ARMv8.5a instructions. |
halide_target_feature_armv86a | Enable ARMv8.6a instructions. |
halide_target_feature_armv87a | Enable ARMv8.7a instructions. |
halide_target_feature_armv88a | Enable ARMv8.8a instructions. |
halide_target_feature_armv89a | Enable ARMv8.9a instructions. |
halide_target_feature_sanitizer_coverage | Enable hooks for SanitizerCoverage support. |
halide_target_feature_profile_by_timer | Alternative to halide_target_feature_profile using timer interrupt for systems without threads or applicartions that need to avoid them. |
halide_target_feature_spirv | Enable SPIR-V code generation support. |
halide_target_feature_vulkan | Enable Vulkan runtime support. |
halide_target_feature_vulkan_int8 | Enable Vulkan 8-bit integer support. |
halide_target_feature_vulkan_int16 | Enable Vulkan 16-bit integer support. |
halide_target_feature_vulkan_int64 | Enable Vulkan 64-bit integer support. |
halide_target_feature_vulkan_float16 | Enable Vulkan 16-bit float support. |
halide_target_feature_vulkan_float64 | Enable Vulkan 64-bit float support. |
halide_target_feature_vulkan_version10 | Enable Vulkan v1.0 runtime target support. |
halide_target_feature_vulkan_version12 | Enable Vulkan v1.2 runtime target support. |
halide_target_feature_vulkan_version13 | Enable Vulkan v1.3 runtime target support. |
halide_target_feature_semihosting | Used together with Target::NoOS for the baremetal target built with semihosting library and run with semihosting mode where minimum I/O communication with a host PC is available. |
halide_target_feature_avx10_1 | Intel AVX10 version 1 support. vector_bits is used to indicate width. |
halide_target_feature_x86_apx | Intel x86 APX support. Covers initial set of features released as APX: egpr,push2pop2,ppx,ndd . |
halide_target_feature_end | A sentinel. Every target is considered to have this feature, and setting this feature does nothing. |
Definition at line 1353 of file HalideRuntime.h.
enum halide_buffer_flags |
Enumerator | |
---|---|
halide_buffer_flag_host_dirty | |
halide_buffer_flag_device_dirty |
Definition at line 1538 of file HalideRuntime.h.
Enumerator | |
---|---|
halide_argument_kind_input_scalar | |
halide_argument_kind_input_buffer | |
halide_argument_kind_output_buffer |
Definition at line 1740 of file HalideRuntime.h.
|
extern |
Print a message to stderr.
Main use is to support tracing functionality, print, and print_when calls. Also called by the default halide_error. This function can be replaced in JITed code by using halide_custom_print and providing an implementation of halide_print in AOT code. See Func::set_custom_print.
|
extern |
|
extern |
|
extern |
Halide calls this function on runtime errors (for example bounds checking failures).
This function can be replaced in JITed code by using Func::set_error_handler, or in AOT code by calling halide_set_error_handler. In AOT code on platforms that support weak linking (i.e. not Windows), you can also override it by simply defining your own halide_error.
Referenced by Halide::Runtime::Internal::PrinterBase::allocation_error(), Halide::Runtime::Internal::MemoryArena::create(), Halide::Runtime::Internal::StringStorage::create(), halide_set_num_threads(), Halide::Runtime::Internal::MemoryArena::reclaim(), Halide::Runtime::Internal::MemoryArena::reserve(), and Halide::Runtime::Internal::StringStorage::terminate().
|
extern |
|
extern |
|
extern |
A basic set of mutex and condition variable functions, which call platform specific code for mutual exclusion.
Equivalent to posix calls.
Definition at line 874 of file synchronization_common.h.
References Halide::Runtime::Internal::Synchronization::fast_mutex::lock().
Referenced by halide_default_do_par_for(), halide_default_do_parallel_tasks(), halide_default_semaphore_release(), halide_mutex_array_lock(), halide_set_num_threads(), halide_shutdown_thread_pool(), Halide::Runtime::Internal::ScopedMutexLock::ScopedMutexLock(), Halide::Runtime::Internal::worker_thread(), and Halide::Runtime::Internal::worker_thread_already_locked().
|
extern |
Definition at line 880 of file synchronization_common.h.
References Halide::Runtime::Internal::Synchronization::fast_mutex::unlock().
Referenced by halide_default_do_par_for(), halide_default_do_parallel_tasks(), halide_default_semaphore_release(), halide_mutex_array_unlock(), halide_set_num_threads(), halide_shutdown_thread_pool(), Halide::Runtime::Internal::worker_thread(), Halide::Runtime::Internal::worker_thread_already_locked(), and Halide::Runtime::Internal::ScopedMutexLock::~ScopedMutexLock().
|
extern |
Definition at line 892 of file synchronization_common.h.
References Halide::Runtime::Internal::Synchronization::fast_cond::signal().
|
extern |
Definition at line 886 of file synchronization_common.h.
References Halide::Runtime::Internal::Synchronization::fast_cond::broadcast().
Referenced by Halide::Runtime::Internal::enqueue_work_already_locked(), halide_default_semaphore_release(), halide_shutdown_thread_pool(), and Halide::Runtime::Internal::worker_thread_already_locked().
|
extern |
Definition at line 898 of file synchronization_common.h.
References Halide::Runtime::Internal::Synchronization::fast_cond::wait().
Referenced by Halide::Runtime::Internal::worker_thread_already_locked().
|
extern |
Definition at line 911 of file synchronization_common.h.
References halide_mutex_array::array, halide_free(), halide_malloc(), and memset().
|
extern |
Definition at line 931 of file synchronization_common.h.
References halide_mutex_array::array, and halide_free().
|
extern |
Definition at line 937 of file synchronization_common.h.
References halide_mutex_array::array, halide_error_code_success, and halide_mutex_lock().
|
extern |
Definition at line 942 of file synchronization_common.h.
References halide_mutex_array::array, halide_error_code_success, and halide_mutex_unlock().
|
extern |
Definition at line 799 of file thread_pool_common.h.
References Halide::Runtime::Internal::custom_do_par_for.
|
extern |
Definition at line 696 of file thread_pool_common.h.
References halide_cond_broadcast(), halide_join_thread(), halide_mutex_lock(), halide_mutex_unlock(), Halide::Runtime::Internal::work_queue_t::initialized, Halide::Runtime::Internal::work_queue_t::mutex, Halide::Runtime::Internal::work_queue_t::reset(), Halide::Runtime::Internal::work_queue_t::shutdown, Halide::Runtime::Internal::work_queue_t::threads, Halide::Runtime::Internal::work_queue_t::threads_created, Halide::Runtime::Internal::work_queue_t::wake_a_team, Halide::Runtime::Internal::work_queue_t::wake_b_team, Halide::Runtime::Internal::work_queue_t::wake_owners, and Halide::Runtime::Internal::work_queue.
|
extern |
Definition at line 770 of file thread_pool_common.h.
References Halide::Runtime::Internal::custom_do_par_for.
|
extern |
Definition at line 815 of file thread_pool_common.h.
References Halide::Runtime::Internal::custom_semaphore_init.
|
extern |
Definition at line 819 of file thread_pool_common.h.
References Halide::Runtime::Internal::custom_semaphore_release.
|
extern |
Definition at line 823 of file thread_pool_common.h.
References Halide::Runtime::Internal::custom_semaphore_try_acquire.
|
extern |
Enqueue some number of the tasks described above and wait for them to complete.
While waiting, the calling threads assists with either the tasks enqueued, or other non-blocking tasks in the task system. Note that task_parent should be NULL for top-level calls and the pass through argument if this call is being made from another task.
Definition at line 809 of file thread_pool_common.h.
References Halide::Runtime::Internal::custom_do_parallel_tasks.
|
extern |
Definition at line 758 of file thread_pool_common.h.
References Halide::Runtime::Internal::custom_do_task.
|
extern |
Definition at line 794 of file thread_pool_common.h.
References Halide::Runtime::Internal::custom_do_task.
Referenced by Halide::Runtime::Internal::worker_thread_already_locked().
|
extern |
Definition at line 764 of file thread_pool_common.h.
References Halide::Runtime::Internal::custom_do_loop_task.
|
extern |
Definition at line 804 of file thread_pool_common.h.
References Halide::Runtime::Internal::custom_do_loop_task.
Referenced by Halide::Runtime::Internal::worker_thread_already_locked().
|
extern |
Definition at line 776 of file thread_pool_common.h.
References Halide::Runtime::Internal::custom_do_loop_task, Halide::Runtime::Internal::custom_do_par_for, Halide::Runtime::Internal::custom_do_parallel_tasks, Halide::Runtime::Internal::custom_do_task, Halide::Runtime::Internal::custom_semaphore_init, Halide::Runtime::Internal::custom_semaphore_release, and Halide::Runtime::Internal::custom_semaphore_try_acquire.
|
extern |
The default versions of the parallel runtime functions.
Definition at line 607 of file thread_pool_common.h.
References Halide::Runtime::Internal::work::active_workers, halide_parallel_task_t::closure, Halide::Runtime::Internal::enqueue_work_already_locked(), Halide::Runtime::Internal::work::exit_status, halide_parallel_task_t::extent, halide_parallel_task_t::fn, halide_error_code_success, halide_mutex_lock(), halide_mutex_unlock(), halide_parallel_task_t::min, halide_parallel_task_t::min_threads, Halide::Runtime::Internal::work_queue_t::mutex, halide_parallel_task_t::name, Halide::Runtime::Internal::work::next_semaphore, halide_parallel_task_t::num_semaphores, Halide::Runtime::Internal::work::owner_is_sleeping, Halide::Runtime::Internal::work::parent_job, halide_parallel_task_t::semaphores, halide_parallel_task_t::serial, Halide::Runtime::Internal::work::sibling_count, Halide::Runtime::Internal::work::siblings, Halide::Runtime::Internal::work::task, Halide::Runtime::Internal::work::task_fn, Halide::Runtime::Internal::work::user_context, Halide::Runtime::Internal::work_queue, and Halide::Runtime::Internal::worker_thread_already_locked().
|
extern |
Definition at line 639 of file thread_pool_common.h.
References Halide::Runtime::Internal::work::active_workers, Halide::Runtime::Internal::enqueue_work_already_locked(), Halide::Runtime::Internal::work::exit_status, halide_parallel_task_t::extent, halide_error_code_success, halide_mutex_lock(), halide_mutex_unlock(), Halide::Runtime::Internal::work_queue_t::mutex, Halide::Runtime::Internal::work::next_semaphore, Halide::Runtime::Internal::work::owner_is_sleeping, Halide::Runtime::Internal::work::parent_job, Halide::Runtime::Internal::work::task, Halide::Runtime::Internal::work::task_fn, Halide::Runtime::Internal::work::user_context, Halide::Runtime::Internal::work_queue, and Halide::Runtime::Internal::worker_thread_already_locked().
|
extern |
Definition at line 596 of file thread_pool_common.h.
|
extern |
Definition at line 601 of file thread_pool_common.h.
|
extern |
Definition at line 722 of file thread_pool_common.h.
References halide_semaphore_impl_t::value.
|
extern |
Definition at line 728 of file thread_pool_common.h.
References halide_cond_broadcast(), halide_mutex_lock(), halide_mutex_unlock(), Halide::Runtime::Internal::work_queue_t::mutex, halide_semaphore_impl_t::value, Halide::Runtime::Internal::work_queue_t::wake_a_team, Halide::Runtime::Internal::work_queue_t::wake_owners, and Halide::Runtime::Internal::work_queue.
|
extern |
Definition at line 742 of file thread_pool_common.h.
References halide_semaphore_impl_t::value.
Referenced by Halide::Runtime::Internal::work::make_runnable().
|
extern |
Spawn a thread.
Returns a handle to the thread for the purposes of joining it. The thread must be joined in order to clean up any resources associated with it.
Referenced by Halide::Runtime::Internal::enqueue_work_already_locked().
|
extern |
Join a thread.
Referenced by halide_shutdown_thread_pool().
|
extern |
Set the number of threads used by Halide's thread pool.
Returns the old number.
n < 0 : error condition n == 0 : use a reasonable system default (typically, number of cpus online). n == 1 : use exactly one thread; this will always enforce serial execution n > 1 : use a pool of exactly n threads.
(Note that this is only guaranteed when using the default implementations of halide_do_par_for(); custom implementations may completely ignore values passed to halide_set_num_threads().)
Definition at line 679 of file thread_pool_common.h.
References Halide::Runtime::Internal::clamp_num_threads(), Halide::Runtime::Internal::default_desired_num_threads(), Halide::Runtime::Internal::work_queue_t::desired_threads_working, halide_error(), halide_mutex_lock(), halide_mutex_unlock(), Halide::Runtime::Internal::work_queue_t::mutex, and Halide::Runtime::Internal::work_queue.
|
extern |
Halide calls these functions to allocate and free memory.
To replace in AOT code, use the halide_set_custom_malloc and halide_set_custom_free, or (on platforms that support weak linking), simply define these functions yourself. In JIT-compiled code use Func::set_custom_allocator.
If you override them, and find yourself wanting to call the default implementation from within your override, use halide_default_malloc/free.
Note that halide_malloc must return a pointer aligned to the maximum meaningful alignment for the platform for the purpose of vector loads and stores, and with an allocated size that is (at least) an integral multiple of that same alignment. The default implementation uses 32-byte alignment on arm and 64-byte alignment on x86. Additionally, it must be safe to read at least 8 bytes before the start and beyond the end.
Referenced by halide_mutex_array_create().
|
extern |
Referenced by halide_mutex_array_create(), and halide_mutex_array_destroy().
|
extern |
|
extern |
|
extern |
|
extern |
|
extern |
Halide calls these functions to interact with the underlying system runtime functions.
To replace in AOT code on platforms that support weak linking, define these functions yourself, or use the halide_set_custom_load_library() and halide_set_custom_get_library_symbol() functions. In JIT-compiled code, use JITSharedRuntime::set_default_handlers().
halide_load_library and halide_get_library_symbol are equivalent to dlopen and dlsym. halide_get_symbol(sym) is equivalent to dlsym(RTLD_DEFAULT, sym).
|
extern |
|
extern |
|
extern |
|
extern |
|
extern |
|
extern |
|
extern |
|
extern |
|
extern |
Called when debug_to_file is used inside Halide code.
See Func::debug_to_file for how this is called
Cannot be replaced in JITted code at present.
|
extern |
Called when Funcs are marked as trace_load, trace_store, or trace_realization.
See Func::set_custom_trace. The default implementation either prints events via halide_print, or if HL_TRACE_FILE is defined, dumps the trace to that file in a sequence of trace packets. The header for a trace packet is defined below. If the trace is going to be large, you may want to make the file a named pipe, and then read from that pipe into gzip.
halide_trace returns a unique ID which will be passed to future events that "belong" to the earlier event as the parent id. The ownership hierarchy looks like:
begin_pipeline +–trace_tag (if any) +–trace_tag (if any) ... +–begin_realization | +–produce | | +–load/store | | +–end_produce | +–consume | | +–load | | +–end_consume | +–end_realization +–end_pipeline
Threading means that ownership cannot be inferred from the ordering of events. There can be many active realizations of a given function, or many active productions for a single realization. Within a single production, the ordering of events is meaningful.
Note that all trace_tag events (if any) will occur just after the begin_pipeline event, but before any begin_realization events. All trace_tags for a given Func will be emitted in the order added.
|
extern |
|
extern |
|
extern |
Set the file descriptor that Halide should write binary trace events to.
If called with 0 as the argument, Halide outputs trace information to stdout in a human-readable format. If never called, Halide checks the for existence of an environment variable called HL_TRACE_FILE and opens that file. If HL_TRACE_FILE is not defined, it outputs trace information to stdout in a human-readable format.
|
extern |
Halide calls this to retrieve the file descriptor to write binary trace events to.
The default implementation returns the value set by halide_set_trace_file. Implement it yourself if you wish to use a custom file descriptor per user_context. Return zero from your implementation to tell Halide to print human-readable trace information to stdout.
|
extern |
If tracing is writing to a file.
This call closes that file (flushing the trace). Returns zero on success.
|
extern |
Release all data associated with the given device interface, in particular all resources (memory, texture, context handles) allocated by Halide.
Must be called explicitly when using AOT compilation. This is not thread-safe with respect to actively running Halide code. Ensure all pipelines are finished before calling this.
|
extern |
Copy image data from device memory to host memory.
This must be called explicitly to copy back the results of a GPU-based filter.
|
extern |
Copy image data from host memory to device memory.
This should not be called directly; Halide handles copying to the device automatically. If interface is NULL and the buf has a non-zero dev field, the device associated with the dev handle will be used. Otherwise if the dev field is 0 and interface is NULL, an error is returned.
|
extern |
Copy data from one buffer to another.
The buffers may have different shapes and sizes, but the destination buffer's shape must be contained within the source buffer's shape. That is, for each dimension, the min on the destination buffer must be greater than or equal to the min on the source buffer, and min+extent on the destination buffer must be less that or equal to min+extent on the source buffer. The source data is pulled from either device or host memory on the source, depending on the dirty flags. host is preferred if both are valid. The dst_device_interface parameter controls the destination memory space. NULL means host memory.
|
extern |
Give the destination buffer a device allocation which is an alias for the same coordinate range in the source buffer.
Modifies the device, device_interface, and the device_dirty flag only. Only supported by some device APIs (others will return halide_error_code_device_crop_unsupported). Call halide_device_release_crop instead of halide_device_free to clean up resources associated with the cropped view. Do not free the device allocation on the source buffer while the destination buffer still lives. Note that the two buffers do not share dirty flags, so care must be taken to update them together as needed. Note that src and dst are required to have the same number of dimensions.
Note also that (in theory) device interfaces which support cropping may still not support cropping a crop (instead, create a new crop of the parent buffer); in practice, no known implementation has this limitation, although it is possible that some future implementations may require it.
|
extern |
Give the destination buffer a device allocation which is an alias for a similar coordinate range in the source buffer, but with one dimension sliced away in the dst.
Modifies the device, device_interface, and the device_dirty flag only. Only supported by some device APIs (others will return halide_error_code_device_crop_unsupported). Call halide_device_release_crop instead of halide_device_free to clean up resources associated with the sliced view. Do not free the device allocation on the source buffer while the destination buffer still lives. Note that the two buffers do not share dirty flags, so care must be taken to update them together as needed. Note that the dst buffer must have exactly one fewer dimension than the src buffer, and that slice_dim and slice_pos must be valid within src.
|
extern |
Release any resources associated with a cropped/sliced view of another buffer.
|
extern |
Wait for current GPU operations to complete.
Calling this explicitly should rarely be necessary, except maybe for profiling.
|
extern |
Wait for current GPU operations to complete.
Calling this explicitly should rarely be necessary, except maybe for profiling. This variation of the synchronizing is useful when a synchronization is desirable without specifying any buffer to synchronize on. Calling this with a null device_interface is always illegal.
|
extern |
Allocate device memory to back a halide_buffer_t.
|
extern |
Free device memory.
|
extern |
Wrap or detach a native device handle, setting the device field and device_interface field as appropriate for the given GPU API.
The meaning of the opaque handle is specific to the device interface, so if you know the device interface in use, call the more specific functions in the runtime headers for your specific device API instead (e.g. HalideRuntimeCuda.h).
|
extern |
|
extern |
|
extern |
Halide calls this to get the desired halide gpu device setting.
Implement this yourself to use a different gpu device per user_context. The default implementation returns the value set by halide_set_gpu_device, or the environment variable HL_GPU_DEVICE.
Definition at line 53 of file HalidePyTorchCudaHelpers.h.
References Halide::PyTorch::UserContext::device_id.
|
extern |
Set the soft maximum amount of memory, in bytes, that the LRU cache will use to memoize Func results.
This is not a strict maximum in that concurrency and simultaneous use of memoized reults larger than the cache size can both cause it to temporariliy be larger than the size specified here.
|
extern |
Given a cache key for a memoized result, currently constructed from the Func name and top-level Func name plus the arguments of the computation, determine if the result is in the cache and return it if so.
(The internals of the cache key should be considered opaque by this function.) If this routine returns true, it is a cache miss. Otherwise, it will return false and the buffers passed in will be filled, via copying, with memoized data. The last argument is a list if halide_buffer_t pointers which represents the outputs of the memoized Func. If the Func does not return a Tuple, there will only be one halide_buffer_t in the list. The tuple_count parameters determines the length of the list.
The return values are: -1: Signals an error. 0: Success and cache hit. 1: Success and cache miss.
|
extern |
Given a cache key for a memoized result, currently constructed from the Func name and top-level Func name plus the arguments of the computation, store the result in the cache for futre access by halide_memoization_cache_lookup.
(The internals of the cache key should be considered opaque by this function.) Data is copied out from the inputs and inputs are unmodified. The last argument is a list if halide_buffer_t pointers which represents the outputs of the memoized Func. If the Func does not return a Tuple, there will only be one halide_buffer_t in the list. The tuple_count parameters determines the length of the list.
If there is a memory allocation failure, the store does not store the data into the cache.
If has_eviction_key is true, the entry is marked with eviction_key to allow removing the key with halide_memoization_cache_evict.
|
extern |
Evict all cache entries that were tagged with the given eviction_key in the memoize scheduling directive.
|
extern |
If halide_memoization_cache_lookup succeeds, halide_memoization_cache_release must be called to signal the storage is no longer being used by the caller.
It will be passed the host pointer of one the buffers returned by halide_memoization_cache_lookup. That is halide_memoization_cache_release will be called multiple times for the case where halide_memoization_cache_lookup is handling multiple buffers. (This corresponds to memoizing a Tuple in Halide.) Note that the host pointer must be sufficient to get to all information the release operation needs. The default Halide cache impleemntation accomplishes this by storing extra data before the start of the user modifiable host storage.
This call is like free and does not have a failure return.
|
extern |
Free all memory and resources associated with the memoization cache.
Must be called at a time when no other threads are accessing the cache.
|
extern |
Verify that a given range of memory has been initialized; only used when Target::MSAN is enabled.
The default implementation simply calls the LLVM-provided __msan_check_mem_is_initialized() function.
The return value should always be zero.
|
extern |
Verify that the data pointed to by the halide_buffer_t is initialized (but not the halide_buffer_t itself), using halide_msan_check_memory_is_initialized() for checking.
The default implementation takes pains to only check the active memory ranges (skipping padding), and sorting into ranges to always check the smallest number of ranges, in monotonically increasing memory order.
Most client code should never need to replace the default implementation.
The return value should always be zero.
|
extern |
Annotate that a given range of memory has been initialized; only used when Target::MSAN is enabled.
The default implementation simply calls the LLVM-provided __msan_unpoison() function.
The return value should always be zero.
Referenced by Halide::Runtime::Internal::PrinterBase::str().
|
extern |
Mark the data pointed to by the halide_buffer_t as initialized (but not the halide_buffer_t itself), using halide_msan_annotate_memory_is_initialized() for marking.
The default implementation takes pains to only mark the active memory ranges (skipping padding), and sorting into ranges to always mark the smallest number of ranges, in monotonically increasing memory order.
Most client code should never need to replace the default implementation.
The return value should always be zero.
|
extern |
|
extern |
Halide calls the functions below on various error conditions.
The default implementations construct an error message, call halide_error, then return the matching error code above. On platforms that support weak linking, you can override these to catch the errors individually. A call into an extern stage for the purposes of bounds inference failed. Returns the error code given by the extern stage.
|
extern |
A call to an extern stage failed.
Returned the error code given by the extern stage.
|
extern |
Various other error conditions.
See the enum above for a description of each.
|
extern |
|
extern |
|
extern |
|
extern |
|
extern |
|
extern |
|
extern |
|
extern |
|
extern |
|
extern |
|
extern |
|
extern |
|
extern |
|
extern |
|
extern |
|
extern |
|
extern |
|
extern |
|
extern |
|
extern |
|
extern |
|
extern |
|
extern |
|
extern |
|
extern |
|
extern |
|
extern |
|
extern |
|
extern |
|
extern |
|
extern |
|
extern |
|
extern |
|
extern |
This function is called internally by Halide in some situations to determine if the current execution environment can support the given set of halide_target_feature_t flags.
The implementation must do the following:
– If there are flags set in features that the function knows cannot be supported, return 0. – Otherwise, return 1. – Note that any flags set in features that the function doesn't know how to test should be ignored; this implies that a return value of 1 means "not known to be bad" rather than "known to be good".
In other words: a return value of 0 means "It is not safe to use code compiled with these features", while a return value of 1 means "It is not obviously unsafe to use code compiled with these features".
The default implementation simply calls halide_default_can_use_target_features.
Note that features
points to an array of count
uint64_t; this array must contain enough bits to represent all the currently known features. Any excess bits must be set to zero.
|
extern |
|
extern |
This is the default implementation of halide_can_use_target_features; it is provided for convenience of user code that may wish to extend halide_can_use_target_features but continue providing existing support, e.g.
int halide_can_use_target_features(int count, const uint64_t *features) { if (features[halide_target_somefeature >> 6] & (1LL << (halide_target_somefeature & 63))) { if (!can_use_somefeature()) { return 0; } } return halide_default_can_use_target_features(count, features); }
void halide_register_argv_and_metadata | ( | int(* | filter_argv_call )(void **), |
const struct halide_filter_metadata_t * | filter_metadata, | ||
const char *const * | extra_key_value_pairs ) |
halide_register_argv_and_metadata() is a user-defined function that must be provided in order to use the registration.cc files produced by Generators when the 'registration' output is requested.
Each registration.cc file provides a static initializer that calls this function with the given filter's argv-call variant, its metadata, and (optionally) and additional textual data that the build system chooses to tack on for its own purposes. Note that this will be called at static-initializer time (i.e., before main() is called), and in an unpredictable order. Note that extra_key_value_pairs may be nullptr; if it's not null, it's expected to be a null-terminated list of strings, with an even number of entries.
struct HALIDE_ATTRIBUTE_ALIGN | ( | 8 | ) |
The functions below here are relevant for pipelines compiled with the -profile target flag, which runs a sampling profiler thread alongside the pipeline.
Per-invocation-of-a-pipeline state.
Per-pipeline state tracked by the sampling profiler.
Per-Func state tracked by the sampling profiler.
These exist in a linked list.
Lives on the stack of the Halide code. Exists in a doubly-linked list to that it can be cleanly removed.
Definition at line 1825 of file HalideRuntime.h.
|
extern |
Get a pointer to the global profiler state for programmatic inspection.
Lock it before using to pause the profiler.
|
extern |
Get a pointer to the pipeline state associated with pipeline_name.
This function grabs the global profiler state's lock on entry.
|
extern |
Collects profiling information.
Intended to be called from a timer interrupt handler if timer based profiling is being used. State argument is acquired via halide_profiler_get_pipeline_state. prev_t argument is the previous time and can be used to set a more accurate time interval if desired.
|
extern |
Reset profiler state cheaply.
May leave threads running or some memory allocated but all accumulated statistics are reset. Blocks until all running profiled Halide pipelines exit.
|
extern |
Reset all profiler state.
Blocks until all running profiled Halide pipelines exit.
|
extern |
Print out timing statistics for everything run since the last reset.
Also happens at process exit.
|
extern |
These routines are called to temporarily disable and then reenable the profiler.
|
extern |
|
extern |
Read bits representing a half precision floating point number and return the float that represents the same value.
|
extern |
Read bits representing a half precision floating point number and return the double that represents the same value.
Referenced by Halide::Runtime::Internal::PrinterBase::operator<<().
|
extern |
Tell Halide whether or not it is permitted to hold onto device allocations to service future requests instead of returning them eagerly to the underlying device API.
Many device allocators are quite slow, so it can be beneficial to set this to true. The default value for now is false.
Note that if enabled, the eviction policy is very simplistic. The 32 most-recently used allocations are preserved, regardless of their size. Additionally, if a call to cuMalloc results in an out-of-memory error, the entire cache is flushed and the allocation is retried. See https://github.com/halide/Halide/issues/4093
If set to false, releases all unused device allocations back to the underlying device APIs. For finer-grained control, see specific methods in each device api runtime.
Note that if the flag is set to true, this call must succeed and return a value of halide_error_code_success (i.e., zero); if you replace the implementation of this call in the runtime, you must honor this contract.
|
extern |
Determines whether on device_free the memory is returned immediately to the device API, or placed on a free list for future use.
Override and switch based on the user_context for finer-grained control. By default just returns the value most recently set by the method above.
|
extern |
Register a callback to be informed when halide_reuse_device_allocations(false) is called, and all unused device allocations must be released.
The object passed should have global lifetime, and its next field will be clobbered.