Halide
HalideRuntime.h
Go to the documentation of this file.
1 #ifndef HALIDE_HALIDERUNTIME_H
2 #define HALIDE_HALIDERUNTIME_H
3 
4 #ifndef COMPILING_HALIDE_RUNTIME
5 #include <stddef.h>
6 #include <stdint.h>
7 #include <stdbool.h>
8 #else
9 #include "runtime_internal.h"
10 #endif
11 
12 #ifdef __cplusplus
13 // Forward declare type to allow naming typed handles.
14 // See Type.h for documentation.
15 template<typename T> struct halide_handle_traits;
16 #endif
17 
18 #ifdef __cplusplus
19 extern "C" {
20 #endif
21 
22 // Note that you should not use "inline" along with HALIDE_ALWAYS_INLINE;
23 // it is not necessary, and may produce warnings for some build configurations.
24 #ifdef _MSC_VER
25 #define HALIDE_ALWAYS_INLINE __forceinline
26 #else
27 #define HALIDE_ALWAYS_INLINE __attribute__((always_inline)) inline
28 #endif
29 
30 /** \file
31  *
32  * This file declares the routines used by Halide internally in its
33  * runtime. On platforms that support weak linking, these can be
34  * replaced with user-defined versions by defining an extern "C"
35  * function with the same name and signature.
36  *
37  * When doing Just In Time (JIT) compilation methods on the Func being
38  * compiled must be called instead. The corresponding methods are
39  * documented below.
40  *
41  * All of these functions take a "void *user_context" parameter as their
42  * first argument; if the Halide kernel that calls back to any of these
43  * functions has been compiled with the UserContext feature set on its Target,
44  * then the value of that pointer passed from the code that calls the
45  * Halide kernel is piped through to the function.
46  *
47  * Some of these are also useful to call when using the default
48  * implementation. E.g. halide_shutdown_thread_pool.
49  *
50  * Note that even on platforms with weak linking, some linker setups
51  * may not respect the override you provide. E.g. if the override is
52  * in a shared library and the halide object files are linked directly
53  * into the output, the builtin versions of the runtime functions will
54  * be called. See your linker documentation for more details. On
55  * Linux, LD_DYNAMIC_WEAK=1 may help.
56  *
57  */
58 
59 // Forward-declare to suppress warnings if compiling as C.
60 struct halide_buffer_t;
61 struct buffer_t;
62 
63 /** Print a message to stderr. Main use is to support tracing
64  * functionality, print, and print_when calls. Also called by the default
65  * halide_error. This function can be replaced in JITed code by using
66  * halide_custom_print and providing an implementation of halide_print
67  * in AOT code. See Func::set_custom_print.
68  */
69 // @{
70 extern void halide_print(void *user_context, const char *);
71 extern void halide_default_print(void *user_context, const char *);
72 typedef void (*halide_print_t)(void *, const char *);
74 // @}
75 
76 /** Halide calls this function on runtime errors (for example bounds
77  * checking failures). This function can be replaced in JITed code by
78  * using Func::set_error_handler, or in AOT code by calling
79  * halide_set_error_handler. In AOT code on platforms that support
80  * weak linking (i.e. not Windows), you can also override it by simply
81  * defining your own halide_error.
82  */
83 // @{
84 extern void halide_error(void *user_context, const char *);
85 extern void halide_default_error(void *user_context, const char *);
86 typedef void (*halide_error_handler_t)(void *, const char *);
88 // @}
89 
90 /** Cross-platform mutex. These are allocated statically inside the
91  * runtime, hence the fixed size. They must be initialized with
92  * zero. The first time halide_mutex_lock is called, the lock must be
93  * initialized in a thread safe manner. This incurs a small overhead
94  * for a once mechanism, but makes the lock reliably easy to setup and
95  * use without depending on e.g. C++ constructor logic.
96  */
97 struct halide_mutex {
99 };
100 
101 /** A basic set of mutex and condition variable functions, which call
102  * platform specific code for mutual exclusion. Equivalent to posix
103  * calls. Mutexes should initially be set to zero'd memory. Any
104  * resources required are created on first lock. Calling destroy
105  * re-zeros the memory.
106  */
107 //@{
108 extern void halide_mutex_lock(struct halide_mutex *mutex);
109 extern void halide_mutex_unlock(struct halide_mutex *mutex);
110 extern void halide_mutex_destroy(struct halide_mutex *mutex);
111 //@}
112 
113 /** Define halide_do_par_for to replace the default thread pool
114  * implementation. halide_shutdown_thread_pool can also be called to
115  * release resources used by the default thread pool on platforms
116  * where it makes sense. (E.g. On Mac OS, Grand Central Dispatch is
117  * used so %Halide does not own the threads backing the pool and they
118  * cannot be released.) See Func::set_custom_do_task and
119  * Func::set_custom_do_par_for. Should return zero if all the jobs
120  * return zero, or an arbitrarily chosen return value from one of the
121  * jobs otherwise.
122  */
123 //@{
124 typedef int (*halide_task_t)(void *user_context, int task_number, uint8_t *closure);
125 extern int halide_do_par_for(void *user_context,
126  halide_task_t task,
127  int min, int size, uint8_t *closure);
128 extern void halide_shutdown_thread_pool();
129 //@}
130 
131 /** Set a custom method for performing a parallel for loop. Returns
132  * the old do_par_for handler. */
133 typedef int (*halide_do_par_for_t)(void *, halide_task_t, int, int, uint8_t*);
135 
136 /** If you use the default do_par_for, you can still set a custom
137  * handler to perform each individual task. Returns the old handler. */
138 //@{
139 typedef int (*halide_do_task_t)(void *, halide_task_t, int, uint8_t *);
141 extern int halide_do_task(void *user_context, halide_task_t f, int idx,
142  uint8_t *closure);
143 //@}
144 
145 /** The default versions of do_task and do_par_for. Can be convenient
146  * to call from overrides in certain circumstances. */
147 // @{
148 extern int halide_default_do_par_for(void *user_context,
149  halide_task_t task,
150  int min, int size, uint8_t *closure);
151 extern int halide_default_do_task(void *user_context, halide_task_t f, int idx,
152  uint8_t *closure);
153 // @}
154 
155 struct halide_thread;
156 
157 /** Spawn a thread. Returns a handle to the thread for the purposes of
158  * joining it. The thread must be joined in order to clean up any
159  * resources associated with it. */
160 extern struct halide_thread *halide_spawn_thread(void (*f)(void *), void *closure);
161 
162 /** Join a thread. */
163 extern void halide_join_thread(struct halide_thread *);
164 
165 /** Set the number of threads used by Halide's thread pool. Returns
166  * the old number.
167  *
168  * n < 0 : error condition
169  * n == 0 : use a reasonable system default (typically, number of cpus online).
170  * n == 1 : use exactly one thread; this will always enforce serial execution
171  * n > 1 : use a pool of exactly n threads.
172  *
173  * Note that the default iOS and OSX behavior will treat n > 1 like n == 0;
174  * that is, any positive value other than 1 will use a system-determined number
175  * of threads.
176  *
177  * (Note that this is only guaranteed when using the default implementations
178  * of halide_do_par_for(); custom implementations may completely ignore values
179  * passed to halide_set_num_threads().)
180  */
181 extern int halide_set_num_threads(int n);
182 
183 /** Halide calls these functions to allocate and free memory. To
184  * replace in AOT code, use the halide_set_custom_malloc and
185  * halide_set_custom_free, or (on platforms that support weak
186  * linking), simply define these functions yourself. In JIT-compiled
187  * code use Func::set_custom_allocator.
188  *
189  * If you override them, and find yourself wanting to call the default
190  * implementation from within your override, use
191  * halide_default_malloc/free.
192  *
193  * Note that halide_malloc must return a pointer aligned to the
194  * maximum meaningful alignment for the platform for the purpose of
195  * vector loads and stores. The default implementation uses 32-byte
196  * alignment, which is safe for arm and x86. Additionally, it must be
197  * safe to read at least 8 bytes before the start and beyond the
198  * end.
199  */
200 //@{
201 extern void *halide_malloc(void *user_context, size_t x);
202 extern void halide_free(void *user_context, void *ptr);
203 extern void *halide_default_malloc(void *user_context, size_t x);
204 extern void halide_default_free(void *user_context, void *ptr);
205 typedef void *(*halide_malloc_t)(void *, size_t);
206 typedef void (*halide_free_t)(void *, void *);
209 //@}
210 
211 /** Halide calls these functions to interact with the underlying
212  * system runtime functions. To replace in AOT code on platforms that
213  * support weak linking, define these functions yourself, or use
214  * the halide_set_custom_load_library() and halide_set_custom_get_library_symbol()
215  * functions. In JIT-compiled code, use JITSharedRuntime::set_default_handlers().
216  *
217  * halide_load_library and halide_get_library_symbol are equivalent to
218  * dlopen and dlsym. halide_get_symbol(sym) is equivalent to
219  * dlsym(RTLD_DEFAULT, sym).
220  */
221 //@{
222 extern void *halide_get_symbol(const char *name);
223 extern void *halide_load_library(const char *name);
224 extern void *halide_get_library_symbol(void *lib, const char *name);
225 extern void *halide_default_get_symbol(const char *name);
226 extern void *halide_default_load_library(const char *name);
227 extern void *halide_default_get_library_symbol(void *lib, const char *name);
228 typedef void *(*halide_get_symbol_t)(const char *name);
229 typedef void *(*halide_load_library_t)(const char *name);
230 typedef void *(*halide_get_library_symbol_t)(void *lib, const char *name);
234 //@}
235 
236 /** Called when debug_to_file is used inside %Halide code. See
237  * Func::debug_to_file for how this is called
238  *
239  * Cannot be replaced in JITted code at present.
240  */
241 extern int32_t halide_debug_to_file(void *user_context, const char *filename,
242  int32_t type_code,
243  struct halide_buffer_t *buf);
244 
245 /** Types in the halide type system. They can be ints, unsigned ints,
246  * or floats (of various bit-widths), or a handle (which is always 64-bits).
247  * Note that the int/uint/float values do not imply a specific bit width
248  * (the bit width is expected to be encoded in a separate value).
249  */
250 typedef enum halide_type_code_t
251 #if __cplusplus >= 201103L
252 : uint8_t
253 #endif
254 {
255  halide_type_int = 0, //!< signed integers
256  halide_type_uint = 1, //!< unsigned integers
257  halide_type_float = 2, //!< floating point numbers
258  halide_type_handle = 3 //!< opaque pointer type (void *)
260 
261 // Note that while __attribute__ can go before or after the declaration,
262 // __declspec apparently is only allowed before.
263 #ifndef HALIDE_ATTRIBUTE_ALIGN
264  #ifdef _MSC_VER
265  #define HALIDE_ATTRIBUTE_ALIGN(x) __declspec(align(x))
266  #else
267  #define HALIDE_ATTRIBUTE_ALIGN(x) __attribute__((aligned(x)))
268  #endif
269 #endif
270 
271 /** A runtime tag for a type in the halide type system. Can be ints,
272  * unsigned ints, or floats of various bit-widths (the 'bits'
273  * field). Can also be vectors of the same (by setting the 'lanes'
274  * field to something larger than one). This struct should be
275  * exactly 32-bits in size. */
277  /** The basic type code: signed integer, unsigned integer, or floating point. */
278 #if __cplusplus >= 201103L
279  HALIDE_ATTRIBUTE_ALIGN(1) halide_type_code_t code; // halide_type_code_t
280 #else
281  HALIDE_ATTRIBUTE_ALIGN(1) uint8_t code; // halide_type_code_t
282 #endif
283 
284  /** The number of bits of precision of a single scalar value of this type. */
286 
287  /** How many elements in a vector. This is 1 for scalar types. */
289 
290 #ifdef __cplusplus
291  /** Construct a runtime representation of a Halide type from:
292  * code: The fundamental type from an enum.
293  * bits: The bit size of one element.
294  * lanes: The number of vector elements in the type. */
296  : code(code), bits(bits), lanes(lanes) {
297  }
298 
299  /** Default constructor is required e.g. to declare halide_trace_event
300  * instances. */
301  HALIDE_ALWAYS_INLINE halide_type_t() : code((halide_type_code_t)0), bits(0), lanes(0) {}
302 
303  /** Compare two types for equality. */
304  HALIDE_ALWAYS_INLINE bool operator==(const halide_type_t &other) const {
305  return (code == other.code &&
306  bits == other.bits &&
307  lanes == other.lanes);
308  }
309 
310  HALIDE_ALWAYS_INLINE bool operator!=(const halide_type_t &other) const {
311  return !(*this == other);
312  }
313 
314  /** Size in bytes for a single element, even if width is not 1, of this type. */
315  HALIDE_ALWAYS_INLINE int bytes() const { return (bits + 7) / 8; }
316 #endif
317 };
318 
329 
331  /** The name of the Func or Pipeline that this event refers to */
332  const char *func;
333 
334  /** If the event type is a load or a store, this points to the
335  * value being loaded or stored. Use the type field to safely cast
336  * this to a concrete pointer type and retrieve it. For other
337  * events this is null. */
338  void *value;
339 
340  /** For loads and stores, an array which contains the location
341  * being accessed. For vector loads or stores it is an array of
342  * vectors of coordinates (the vector dimension is innermost).
343  *
344  * For realization or production-related events, this will contain
345  * the mins and extents of the region being accessed, in the order
346  * min0, extent0, min1, extent1, ...
347  *
348  * For pipeline-related events, this will be null.
349  */
351 
352  /** If the event type is a load or a store, this is the type of
353  * the data. Otherwise, the value is meaningless. */
354  struct halide_type_t type;
355 
356  /** The type of event */
358 
359  /* The ID of the parent event (see below for an explanation of
360  * event ancestry). */
362 
363  /** If this was a load or store of a Tuple-valued Func, this is
364  * which tuple element was accessed. */
366 
367  /** The length of the coordinates array */
369 
370 #ifdef __cplusplus
371  // If we don't explicitly mark the default ctor as inline,
372  // certain build configurations can fail (notably iOS)
374 #endif
375 };
376 
377 /** Called when Funcs are marked as trace_load, trace_store, or
378  * trace_realization. See Func::set_custom_trace. The default
379  * implementation either prints events via halide_print, or if
380  * HL_TRACE_FILE is defined, dumps the trace to that file in a
381  * sequence of trace packets. The header for a trace packet is defined
382  * below. If the trace is going to be large, you may want to make the
383  * file a named pipe, and then read from that pipe into gzip.
384  *
385  * halide_trace returns a unique ID which will be passed to future
386  * events that "belong" to the earlier event as the parent id. The
387  * ownership hierarchy looks like:
388  *
389  * begin_pipeline
390  * +--begin_realization
391  * | +--produce
392  * | | +--load/store
393  * | | +--end_produce
394  * | +--consume
395  * | | +--load
396  * | | +--end_consume
397  * | +--end_realization
398  * +--end_pipeline
399  *
400  * Threading means that ownership cannot be inferred from the ordering
401  * of events. There can be many active realizations of a given
402  * function, or many active productions for a single
403  * realization. Within a single production, the ordering of events is
404  * meaningful.
405  */
406 // @}
407 extern int32_t halide_trace(void *user_context, const struct halide_trace_event_t *event);
408 extern int32_t halide_default_trace(void *user_context, const struct halide_trace_event_t *event);
409 typedef int32_t (*halide_trace_t)(void *user_context, const struct halide_trace_event_t *);
411 // @}
412 
413 /** The header of a packet in a binary trace. All fields are 32-bit. */
415  /** The total size of this packet in bytes. Always a multiple of
416  * four. Equivalently, the number of bytes until the next
417  * packet. */
419 
420  /** The id of this packet (for the purpose of parent_id). */
422 
423  /** The remaining fields are equivalent to those in halide_trace_event_t */
424  // @{
425  struct halide_type_t type;
430  // @}
431 
432  #ifdef __cplusplus
433  // If we don't explicitly mark the default ctor as inline,
434  // certain build configurations can fail (notably iOS)
436 
437  /** Get the coordinates array, assuming this packet is laid out in
438  * memory as it was written. The coordinates array comes
439  * immediately after the packet header. */
440  HALIDE_ALWAYS_INLINE const int *coordinates() const {
441  return (const int *)(this + 1);
442  }
443 
444  HALIDE_ALWAYS_INLINE int *coordinates() {
445  return (int *)(this + 1);
446  }
447 
448  /** Get the value, assuming this packet is laid out in memory as
449  * it was written. The packet comes immediately after the coordinates
450  * array. */
451  HALIDE_ALWAYS_INLINE const void *value() const {
452  return (const void *)(coordinates() + dimensions);
453  }
454 
455  HALIDE_ALWAYS_INLINE void *value() {
456  return (void *)(coordinates() + dimensions);
457  }
458 
459  /** Get the func name, assuming this packet is laid out in memory
460  * as it was written. It comes after the value. */
461  HALIDE_ALWAYS_INLINE const char *func() const {
462  return (const char *)value() + type.lanes * type.bytes();
463  }
464 
465  HALIDE_ALWAYS_INLINE char *func() {
466  return (char *)value() + type.lanes * type.bytes();
467  }
468  #endif
469 };
470 
471 
472 
473 /** Set the file descriptor that Halide should write binary trace
474  * events to. If called with 0 as the argument, Halide outputs trace
475  * information to stdout in a human-readable format. If never called,
476  * Halide checks the for existence of an environment variable called
477  * HL_TRACE_FILE and opens that file. If HL_TRACE_FILE is not defined,
478  * it outputs trace information to stdout in a human-readable
479  * format. */
480 extern void halide_set_trace_file(int fd);
481 
482 /** Halide calls this to retrieve the file descriptor to write binary
483  * trace events to. The default implementation returns the value set
484  * by halide_set_trace_file. Implement it yourself if you wish to use
485  * a custom file descriptor per user_context. Return zero from your
486  * implementation to tell Halide to print human-readable trace
487  * information to stdout. */
488 extern int halide_get_trace_file(void *user_context);
489 
490 /** If tracing is writing to a file. This call closes that file
491  * (flushing the trace). Returns zero on success. */
492 extern int halide_shutdown_trace();
493 
494 /** All Halide GPU or device backend implementations provide an
495  * interface to be used with halide_device_malloc, etc. This is
496  * accessed via the functions below.
497  */
498 
499 /** An opaque struct containing per-GPU API implementations of the
500  * device functions. */
502 
503 /** Each GPU API provides a halide_device_interface_t struct pointing
504  * to the code that manages device allocations. You can access these
505  * functions directly from the struct member function pointers, or by
506  * calling the functions declared below. Note that the global
507  * functions are not available when using Halide as a JIT compiler.
508  * If you are using raw halide_buffer_t in that context you must use
509  * the function pointers in the device_interface struct.
510  *
511  * The function pointers below are currently the same for every GPU
512  * API; only the impl field varies. These top-level functions do the
513  * bookkeeping that is common across all GPU APIs, and then dispatch
514  * to more API-specific functions via another set of function pointers
515  * hidden inside the impl field.
516  */
518  int (*device_malloc)(void *user_context, struct halide_buffer_t *buf,
519  const struct halide_device_interface_t *device_interface);
520  int (*device_free)(void *user_context, struct halide_buffer_t *buf);
521  int (*device_sync)(void *user_context, struct halide_buffer_t *buf);
522  void (*device_release)(void *user_context,
523  const struct halide_device_interface_t *device_interface);
525  int (*copy_to_device)(void *user_context, struct halide_buffer_t *buf,
526  const struct halide_device_interface_t *device_interface);
528  const struct halide_device_interface_t *device_interface);
530  int (*buffer_copy)(void *user_context, struct halide_buffer_t *src,
531  const struct halide_device_interface_t *dst_device_interface, struct halide_buffer_t *dst);
532  int (*device_crop)(void *user_context, const struct halide_buffer_t *src,
533  struct halide_buffer_t *dst);
534  int (*device_release_crop)(void *user_context, struct halide_buffer_t *buf);
535  int (*wrap_native)(void *user_context, struct halide_buffer_t *buf, uint64_t handle,
536  const struct halide_device_interface_t *device_interface);
539 };
540 
541 /** Release all data associated with the given device interface, in
542  * particular all resources (memory, texture, context handles)
543  * allocated by Halide. Must be called explicitly when using AOT
544  * compilation. */
545 extern void halide_device_release(void *user_context,
546  const struct halide_device_interface_t *device_interface);
547 
548 /** Copy image data from device memory to host memory. This must be called
549  * explicitly to copy back the results of a GPU-based filter. */
550 extern int halide_copy_to_host(void *user_context, struct halide_buffer_t *buf);
551 
552 /** Copy image data from host memory to device memory. This should not
553  * be called directly; Halide handles copying to the device
554  * automatically. If interface is NULL and the bug has a non-zero dev
555  * field, the device associated with the dev handle will be
556  * used. Otherwise if the dev field is 0 and interface is NULL, an
557  * error is returned. */
558 extern int halide_copy_to_device(void *user_context, struct halide_buffer_t *buf,
559  const struct halide_device_interface_t *device_interface);
560 
561 /** Copy data from one buffer to another. The buffers may have
562  * different shapes and sizes, but the destination buffer's shape must
563  * be contained within the source buffer's shape. That is, for each
564  * dimension, the min on the destination buffer must be greater than
565  * or equal to the min on the source buffer, and min+extent on the
566  * destination buffer must be less that or equal to min+extent on the
567  * source buffer. The source data is pulled from either device or
568  * host memory on the source, depending on the dirty flags. host is
569  * preferred if both are valid. The dst_device_interface parameter
570  * controls the destination memory space. NULL means host memory. */
571 extern int halide_buffer_copy(void *user_context, struct halide_buffer_t *src,
572  const struct halide_device_interface_t *dst_device_interface,
573  struct halide_buffer_t *dst);
574 
575 /** Give the destination buffer a device allocation which is an alias
576  * for the same coordinate range in the source buffer. Modifies the
577  * device, device_interface, and the device_dirty flag only. Only
578  * supported by some device APIs (others will return
579  * halide_error_code_device_crop_unsupported). Call
580  * halide_device_release_crop instead of halide_device_free to clean
581  * up resources associated with the cropped view. Do not free the
582  * device allocation on the source buffer while the destination buffer
583  * still lives. Note that the two buffers do not share dirty flags, so
584  * care must be taken to update them together as needed. Note also
585  * that device interfaces which support cropping may still not support
586  * cropping a crop. Instead, create a new crop of the parent
587  * buffer. */
588 extern int halide_device_crop(void *user_context,
589  const struct halide_buffer_t *src,
590  struct halide_buffer_t *dst);
591 
592 /** Release any resources associated with a cropped view of another
593  * buffer. */
594 extern int halide_device_release_crop(void *user_context,
595  struct halide_buffer_t *buf);
596 
597 /** Wait for current GPU operations to complete. Calling this explicitly
598  * should rarely be necessary, except maybe for profiling. */
599 extern int halide_device_sync(void *user_context, struct halide_buffer_t *buf);
600 
601 /** Allocate device memory to back a halide_buffer_t. */
602 extern int halide_device_malloc(void *user_context, struct halide_buffer_t *buf,
603  const struct halide_device_interface_t *device_interface);
604 
605 /** Free device memory. */
606 extern int halide_device_free(void *user_context, struct halide_buffer_t *buf);
607 
608 /** Wrap or detach a native device handle, setting the device field
609  * and device_interface field as appropriate for the given GPU
610  * API. The meaning of the opaque handle is specific to the device
611  * interface, so if you know the device interface in use, call the
612  * more specific functions in the runtime headers for your specific
613  * device API instead (e.g. HalideRuntimeCuda.h). */
614 // @{
615 extern int halide_device_wrap_native(void *user_context,
616  struct halide_buffer_t *buf,
617  uint64_t handle,
618  const struct halide_device_interface_t *device_interface);
619 extern int halide_device_detach_native(void *user_context, struct halide_buffer_t *buf);
620 // @}
621 
622 /** Versions of the above functions that accept legacy buffer_t structs. */
623 // @{
624 extern int halide_copy_to_host_legacy(void *user_context, struct buffer_t *buf);
625 extern int halide_copy_to_device_legacy(void *user_context, struct buffer_t *buf,
626  const struct halide_device_interface_t *device_interface);
627 extern int halide_device_sync_legacy(void *user_context, struct buffer_t *buf);
628 extern int halide_device_malloc_legacy(void *user_context, struct buffer_t *buf,
629  const struct halide_device_interface_t *device_interface);
630 extern int halide_device_free_legacy(void *user_context, struct buffer_t *buf);
631 // @}
632 
633 /** Selects which gpu device to use. 0 is usually the display
634  * device. If never called, Halide uses the environment variable
635  * HL_GPU_DEVICE. If that variable is unset, Halide uses the last
636  * device. Set this to -1 to use the last device. */
637 extern void halide_set_gpu_device(int n);
638 
639 /** Halide calls this to get the desired halide gpu device
640  * setting. Implement this yourself to use a different gpu device per
641  * user_context. The default implementation returns the value set by
642  * halide_set_gpu_device, or the environment variable
643  * HL_GPU_DEVICE. */
644 extern int halide_get_gpu_device(void *user_context);
645 
646 /** Set the soft maximum amount of memory, in bytes, that the LRU
647  * cache will use to memoize Func results. This is not a strict
648  * maximum in that concurrency and simultaneous use of memoized
649  * reults larger than the cache size can both cause it to
650  * temporariliy be larger than the size specified here.
651  */
653 
654 /** Given a cache key for a memoized result, currently constructed
655  * from the Func name and top-level Func name plus the arguments of
656  * the computation, determine if the result is in the cache and
657  * return it if so. (The internals of the cache key should be
658  * considered opaque by this function.) If this routine returns true,
659  * it is a cache miss. Otherwise, it will return false and the
660  * buffers passed in will be filled, via copying, with memoized
661  * data. The last argument is a list if halide_buffer_t pointers which
662  * represents the outputs of the memoized Func. If the Func does not
663  * return a Tuple, there will only be one halide_buffer_t in the list. The
664  * tuple_count parameters determines the length of the list.
665  *
666  * The return values are:
667  * -1: Signals an error.
668  * 0: Success and cache hit.
669  * 1: Success and cache miss.
670  */
671 extern int halide_memoization_cache_lookup(void *user_context, const uint8_t *cache_key, int32_t size,
672  struct halide_buffer_t *realized_bounds,
673  int32_t tuple_count, struct halide_buffer_t **tuple_buffers);
674 
675 /** Given a cache key for a memoized result, currently constructed
676  * from the Func name and top-level Func name plus the arguments of
677  * the computation, store the result in the cache for futre access by
678  * halide_memoization_cache_lookup. (The internals of the cache key
679  * should be considered opaque by this function.) Data is copied out
680  * from the inputs and inputs are unmodified. The last argument is a
681  * list if halide_buffer_t pointers which represents the outputs of the
682  * memoized Func. If the Func does not return a Tuple, there will
683  * only be one halide_buffer_t in the list. The tuple_count parameters
684  * determines the length of the list.
685  *
686  * If there is a memory allocation failure, the store does not store
687  * the data into the cache.
688  */
689 extern int halide_memoization_cache_store(void *user_context, const uint8_t *cache_key, int32_t size,
690  struct halide_buffer_t *realized_bounds,
691  int32_t tuple_count,
692  struct halide_buffer_t **tuple_buffers);
693 
694 /** If halide_memoization_cache_lookup succeeds,
695  * halide_memoization_cache_release must be called to signal the
696  * storage is no longer being used by the caller. It will be passed
697  * the host pointer of one the buffers returned by
698  * halide_memoization_cache_lookup. That is
699  * halide_memoization_cache_release will be called multiple times for
700  * the case where halide_memoization_cache_lookup is handling multiple
701  * buffers. (This corresponds to memoizing a Tuple in Halide.) Note
702  * that the host pointer must be sufficient to get to all information
703  * the relase operation needs. The default Halide cache impleemntation
704  * accomplishes this by storing extra data before the start of the user
705  * modifiable host storage.
706  *
707  * This call is like free and does not have a failure return.
708  */
709 extern void halide_memoization_cache_release(void *user_context, void *host);
710 
711 /** Free all memory and resources associated with the memoization cache.
712  * Must be called at a time when no other threads are accessing the cache.
713  */
715 
716 /** Create a unique file with a name of the form prefixXXXXXsuffix in an arbitrary
717  * (but writable) directory; this is typically $TMP or /tmp, but the specific
718  * location is not guaranteed. (Note that the exact form of the file name
719  * may vary; in particular, the suffix may be ignored on non-Posix systems.)
720  * The file is created (but not opened), thus this can be called from
721  * different threads (or processes, e.g. when building with parallel make)
722  * without risking collision. Note that the caller is always responsible
723  * for deleting this file. Returns nonzero value if an error occurs.
724  */
725 extern int halide_create_temp_file(void *user_context,
726  const char *prefix, const char *suffix,
727  char *path_buf, size_t path_buf_size);
728 
729 /** Annotate that a given range of memory has been initialized;
730  * only used when Target::MSAN is enabled.
731  *
732  * The default implementation uses the LLVM-provided AnnotateMemoryIsInitialized() function.
733  */
734 extern void halide_msan_annotate_memory_is_initialized(void *user_context, const void *ptr, uint64_t len);
735 
736 /** Mark the data pointed to by the buffer_t as initialized (but *not* the buffer_t itself),
737  * using halide_msan_annotate_memory_is_initialized() for marking.
738  *
739  * The default implementation takes pains to only mark the active memory ranges
740  * (skipping padding), and sorting into ranges to always mark the smallest number of
741  * ranges, in monotonically increasing memory order.
742  *
743  * Most client code should never need to replace the default implementation.
744  */
745 extern void halide_msan_annotate_buffer_is_initialized(void *user_context, struct halide_buffer_t *buffer);
746 extern void halide_msan_annotate_buffer_is_initialized_as_destructor(void *user_context, void *buffer);
747 
748 /** The error codes that may be returned by a Halide pipeline. */
750  /** There was no error. This is the value returned by Halide on success. */
752 
753  /** An uncategorized error occurred. Refer to the string passed to halide_error. */
755 
756  /** A Func was given an explicit bound via Func::bound, but this
757  * was not large enough to encompass the region that is used of
758  * the Func by the rest of the pipeline. */
760 
761  /** The elem_size field of a halide_buffer_t does not match the size in
762  * bytes of the type of that ImageParam. Probable type mismatch. */
764 
765  /** A pipeline would access memory outside of the halide_buffer_t passed
766  * in. */
768 
769  /** A halide_buffer_t was given that spans more than 2GB of memory. */
771 
772  /** A halide_buffer_t was given with extents that multiply to a number
773  * greater than 2^31-1 */
775 
776  /** Applying explicit constraints on the size of an input or
777  * output buffer shrank the size of that buffer below what will be
778  * accessed by the pipeline. */
780 
781  /** A constraint on a size or stride of an input or output buffer
782  * was not met by the halide_buffer_t passed in. */
784 
785  /** A scalar parameter passed in was smaller than its minimum
786  * declared value. */
788 
789  /** A scalar parameter passed in was greater than its minimum
790  * declared value. */
792 
793  /** A call to halide_malloc returned NULL. */
795 
796  /** A halide_buffer_t pointer passed in was NULL. */
798 
799  /** debug_to_file failed to open or write to the specified
800  * file. */
802 
803  /** The Halide runtime encountered an error while trying to copy
804  * from device to host. Turn on -debug in your target string to
805  * see more details. */
807 
808  /** The Halide runtime encountered an error while trying to copy
809  * from host to device. Turn on -debug in your target string to
810  * see more details. */
812 
813  /** The Halide runtime encountered an error while trying to
814  * allocate memory on device. Turn on -debug in your target string
815  * to see more details. */
817 
818  /** The Halide runtime encountered an error while trying to
819  * synchronize with a device. Turn on -debug in your target string
820  * to see more details. */
822 
823  /** The Halide runtime encountered an error while trying to free a
824  * device allocation. Turn on -debug in your target string to see
825  * more details. */
827 
828  /** Buffer has a non-zero device but no device interface, which
829  * violates a Halide invariant. */
831 
832  /** An error occurred when attempting to initialize the Matlab
833  * runtime. */
835 
836  /** The type of an mxArray did not match the expected type. */
838 
839  /** There is a bug in the Halide compiler. */
841 
842  /** The Halide runtime encountered an error while trying to launch
843  * a GPU kernel. Turn on -debug in your target string to see more
844  * details. */
846 
847  /** The Halide runtime encountered a host pointer that violated
848  * the alignment set for it by way of a call to
849  * set_host_alignment */
851 
852  /** A fold_storage directive was used on a dimension that is not
853  * accessed in a monotonically increasing or decreasing fashion. */
855 
856  /** A fold_storage directive was used with a fold factor that was
857  * too small to store all the values of a producer needed by the
858  * consumer. */
860 
861  /** User-specified require() expression was not satisfied. */
863 
864  /** At least one of the buffer's extents are negative. */
866 
867  /** A compiled pipeline was passed the old deprecated buffer_t
868  * struct, and it could not be upgraded to a halide_buffer_t. */
870 
871  /** A compiled pipeline was passed the old deprecated buffer_t
872  * struct in bounds inference mode, but the returned information
873  * can't be expressed in the old buffer_t. */
875 
876  /** A specialize_fail() schedule branch was selected at runtime. */
878 
879  /** The Halide runtime encountered an error while trying to wrap a
880  * native device handle. Turn on -debug in your target string to
881  * see more details. */
883 
884  /** The Halide runtime encountered an error while trying to detach
885  * a native device handle. Turn on -debug in your target string
886  * to see more details. */
888 
889  /** The host field on an input or output was null, the device
890  * field was not zero, and the pipeline tries to use the buffer on
891  * the host. You may be passing a GPU-only buffer to a pipeline
892  * which is scheduled to use it on the CPU. */
894 
895  /** A folded buffer was passed to an extern stage, but the region
896  * touched wraps around the fold boundary. */
898 
899  /** Buffer has a non-null device_interface but device is 0, which
900  * violates a Halide invariant. */
902 
903  /** Buffer has both host and device dirty bits set, which violates
904  * a Halide invariant. */
906 
907  /** The halide_buffer_t * passed to a halide runtime routine is
908  * nullptr and this is not allowed. */
910 
911  /** The Halide runtime encountered an error while trying to copy
912  * from one buffer to another. Turn on -debug in your target
913  * string to see more details. */
915 
916  /** Attempted to make cropped alias of a buffer with a device
917  * field, but the device_interface does not support cropping. */
919 
920  /** Cropping a buffer failed for some other reason. Turn on -debug
921  * in your target string. */
923 
924  /** An operation on a buffer required an allocation on a
925  * particular device interface, but a device allocation already
926  * existed on a different device interface. Free the old one
927  * first. */
929 };
930 
931 /** Halide calls the functions below on various error conditions. The
932  * default implementations construct an error message, call
933  * halide_error, then return the matching error code above. On
934  * platforms that support weak linking, you can override these to
935  * catch the errors individually. */
936 
937 /** A call into an extern stage for the purposes of bounds inference
938  * failed. Returns the error code given by the extern stage. */
939 extern int halide_error_bounds_inference_call_failed(void *user_context, const char *extern_stage_name, int result);
940 
941 /** A call to an extern stage failed. Returned the error code given by
942  * the extern stage. */
943 extern int halide_error_extern_stage_failed(void *user_context, const char *extern_stage_name, int result);
944 
945 /** Various other error conditions. See the enum above for a
946  * description of each. */
947 // @{
948 extern int halide_error_explicit_bounds_too_small(void *user_context, const char *func_name, const char *var_name,
949  int min_bound, int max_bound, int min_required, int max_required);
950 extern int halide_error_bad_type(void *user_context, const char *func_name,
951  uint8_t code_given, uint8_t correct_code,
952  uint8_t bits_given, uint8_t correct_bits,
953  uint16_t lanes_given, uint16_t correct_lanes);
954 extern int halide_error_access_out_of_bounds(void *user_context, const char *func_name,
955  int dimension, int min_touched, int max_touched,
956  int min_valid, int max_valid);
957 extern int halide_error_buffer_allocation_too_large(void *user_context, const char *buffer_name,
958  uint64_t allocation_size, uint64_t max_size);
959 extern int halide_error_buffer_extents_negative(void *user_context, const char *buffer_name, int dimension, int extent);
960 extern int halide_error_buffer_extents_too_large(void *user_context, const char *buffer_name,
961  int64_t actual_size, int64_t max_size);
962 extern int halide_error_constraints_make_required_region_smaller(void *user_context, const char *buffer_name,
963  int dimension,
964  int constrained_min, int constrained_extent,
965  int required_min, int required_extent);
966 extern int halide_error_constraint_violated(void *user_context, const char *var, int val,
967  const char *constrained_var, int constrained_val);
968 extern int halide_error_param_too_small_i64(void *user_context, const char *param_name,
969  int64_t val, int64_t min_val);
970 extern int halide_error_param_too_small_u64(void *user_context, const char *param_name,
971  uint64_t val, uint64_t min_val);
972 extern int halide_error_param_too_small_f64(void *user_context, const char *param_name,
973  double val, double min_val);
974 extern int halide_error_param_too_large_i64(void *user_context, const char *param_name,
975  int64_t val, int64_t max_val);
976 extern int halide_error_param_too_large_u64(void *user_context, const char *param_name,
977  uint64_t val, uint64_t max_val);
978 extern int halide_error_param_too_large_f64(void *user_context, const char *param_name,
979  double val, double max_val);
980 extern int halide_error_out_of_memory(void *user_context);
981 extern int halide_error_buffer_argument_is_null(void *user_context, const char *buffer_name);
982 extern int halide_error_debug_to_file_failed(void *user_context, const char *func,
983  const char *filename, int error_code);
984 extern int halide_error_unaligned_host_ptr(void *user_context, const char *func_name, int alignment);
985 extern int halide_error_host_is_null(void *user_context, const char *func_name);
986 extern int halide_error_failed_to_upgrade_buffer_t(void *user_context,
987  const char *input_name,
988  const char *reason);
989 extern int halide_error_failed_to_downgrade_buffer_t(void *user_context,
990  const char *input_name,
991  const char *reason);
992 extern int halide_error_bad_fold(void *user_context, const char *func_name, const char *var_name,
993  const char *loop_name);
994 extern int halide_error_bad_extern_fold(void *user_context, const char *func_name,
995  int dim, int min, int extent, int valid_min, int fold_factor);
996 
997 extern int halide_error_fold_factor_too_small(void *user_context, const char *func_name, const char *var_name,
998  int fold_factor, const char *loop_name, int required_extent);
999 extern int halide_error_requirement_failed(void *user_context, const char *condition, const char *message);
1000 extern int halide_error_specialize_fail(void *user_context, const char *message);
1001 extern int halide_error_no_device_interface(void *user_context);
1002 extern int halide_error_device_interface_no_device(void *user_context);
1003 extern int halide_error_host_and_device_dirty(void *user_context);
1004 extern int halide_error_buffer_is_null(void *user_context, const char *routine);
1005 
1006 // @}
1007 
1008 /** Optional features a compilation Target can have.
1009  */
1011  halide_target_feature_jit = 0, ///< Generate code that will run immediately inside the calling process.
1012  halide_target_feature_debug = 1, ///< Turn on debug info and output for runtime code.
1013  halide_target_feature_no_asserts = 2, ///< Disable all runtime checks, for slightly tighter code.
1014  halide_target_feature_no_bounds_query = 3, ///< Disable the bounds querying functionality.
1015 
1016  halide_target_feature_sse41 = 4, ///< Use SSE 4.1 and earlier instructions. Only relevant on x86.
1017  halide_target_feature_avx = 5, ///< Use AVX 1 instructions. Only relevant on x86.
1018  halide_target_feature_avx2 = 6, ///< Use AVX 2 instructions. Only relevant on x86.
1019  halide_target_feature_fma = 7, ///< Enable x86 FMA instruction
1020  halide_target_feature_fma4 = 8, ///< Enable x86 (AMD) FMA4 instruction set
1021  halide_target_feature_f16c = 9, ///< Enable x86 16-bit float support
1022 
1023  halide_target_feature_armv7s = 10, ///< Generate code for ARMv7s. Only relevant for 32-bit ARM.
1024  halide_target_feature_no_neon = 11, ///< Avoid using NEON instructions. Only relevant for 32-bit ARM.
1025 
1026  halide_target_feature_vsx = 12, ///< Use VSX instructions. Only relevant on POWERPC.
1027  halide_target_feature_power_arch_2_07 = 13, ///< Use POWER ISA 2.07 new instructions. Only relevant on POWERPC.
1028 
1029  halide_target_feature_cuda = 14, ///< Enable the CUDA runtime. Defaults to compute capability 2.0 (Fermi)
1030  halide_target_feature_cuda_capability30 = 15, ///< Enable CUDA compute capability 3.0 (Kepler)
1031  halide_target_feature_cuda_capability32 = 16, ///< Enable CUDA compute capability 3.2 (Tegra K1)
1032  halide_target_feature_cuda_capability35 = 17, ///< Enable CUDA compute capability 3.5 (Kepler)
1033  halide_target_feature_cuda_capability50 = 18, ///< Enable CUDA compute capability 5.0 (Maxwell)
1034 
1035  halide_target_feature_opencl = 19, ///< Enable the OpenCL runtime.
1036  halide_target_feature_cl_doubles = 20, ///< Enable double support on OpenCL targets
1037 
1038  halide_target_feature_opengl = 21, ///< Enable the OpenGL runtime.
1039  halide_target_feature_openglcompute = 22, ///< Enable OpenGL Compute runtime.
1040 
1041  halide_target_feature_unused_23 = 23, ///< Unused. (Formerly: Enable the RenderScript runtime.)
1042 
1043  halide_target_feature_user_context = 24, ///< Generated code takes a user_context pointer as first argument
1044 
1045  halide_target_feature_matlab = 25, ///< Generate a mexFunction compatible with Matlab mex libraries. See tools/mex_halide.m.
1046 
1047  halide_target_feature_profile = 26, ///< Launch a sampling profiler alongside the Halide pipeline that monitors and reports the runtime used by each Func
1048  halide_target_feature_no_runtime = 27, ///< Do not include a copy of the Halide runtime in any generated object file or assembly
1049 
1050  halide_target_feature_metal = 28, ///< Enable the (Apple) Metal runtime.
1051  halide_target_feature_mingw = 29, ///< For Windows compile to MinGW toolset rather then Visual Studio
1052 
1053  halide_target_feature_c_plus_plus_mangling = 30, ///< Generate C++ mangled names for result function, et al
1054 
1055  halide_target_feature_large_buffers = 31, ///< Enable 64-bit buffer indexing to support buffers > 2GB. Ignored if bits != 64.
1056 
1057  halide_target_feature_hvx_64 = 32, ///< Enable HVX 64 byte mode.
1058  halide_target_feature_hvx_128 = 33, ///< Enable HVX 128 byte mode.
1059  halide_target_feature_hvx_v62 = 34, ///< Enable Hexagon v62 architecture.
1060  halide_target_feature_fuzz_float_stores = 35, ///< On every floating point store, set the last bit of the mantissa to zero. Pipelines for which the output is very different with this feature enabled may also produce very different output on different processors.
1061  halide_target_feature_soft_float_abi = 36, ///< Enable soft float ABI. This only enables the soft float ABI calling convention, which does not necessarily use soft floats.
1062  halide_target_feature_msan = 37, ///< Enable hooks for MSAN support.
1063  halide_target_feature_avx512 = 38, ///< Enable the base AVX512 subset supported by all AVX512 architectures. The specific feature sets are AVX-512F and AVX512-CD. See https://en.wikipedia.org/wiki/AVX-512 for a description of each AVX subset.
1064  halide_target_feature_avx512_knl = 39, ///< Enable the AVX512 features supported by Knight's Landing chips, such as the Xeon Phi x200. This includes the base AVX512 set, and also AVX512-CD and AVX512-ER.
1065  halide_target_feature_avx512_skylake = 40, ///< Enable the AVX512 features supported by Skylake Xeon server processors. This adds AVX512-VL, AVX512-BW, and AVX512-DQ to the base set. The main difference from the base AVX512 set is better support for small integer ops. Note that this does not include the Knight's Landing features. Note also that these features are not available on Skylake desktop and mobile processors.
1066  halide_target_feature_avx512_cannonlake = 41, ///< Enable the AVX512 features expected to be supported by future Cannonlake processors. This includes all of the Skylake features, plus AVX512-IFMA and AVX512-VBMI.
1068  halide_target_feature_trace_loads = 43, ///< Trace all loads done by the pipeline. Equivalent to calling Func::trace_loads on every non-inlined Func.
1069  halide_target_feature_trace_stores = 44, ///< Trace all stores done by the pipeline. Equivalent to calling Func::trace_stores on every non-inlined Func.
1070  halide_target_feature_trace_realizations = 45, ///< Trace all realizations done by the pipeline. Equivalent to calling Func::trace_realizations on every non-inlined Func.
1071  halide_target_feature_cuda_capability61 = 46, ///< Enable CUDA compute capability 6.1 (Pascal)
1072  halide_target_feature_hvx_v65 = 47, ///< Enable Hexagon v65 architecture.
1073  halide_target_feature_hvx_v66 = 48, ///< Enable Hexagon v66 architecture.
1074  halide_target_feature_end = 49, ///< A sentinel. Every target is considered to have this feature, and setting this feature does nothing.
1076 
1077 /** This function is called internally by Halide in some situations to determine
1078  * if the current execution environment can support the given set of
1079  * halide_target_feature_t flags. The implementation must do the following:
1080  *
1081  * -- If there are flags set in features that the function knows *cannot* be supported, return 0.
1082  * -- Otherwise, return 1.
1083  * -- Note that any flags set in features that the function doesn't know how to test should be ignored;
1084  * this implies that a return value of 1 means "not known to be bad" rather than "known to be good".
1085  *
1086  * In other words: a return value of 0 means "It is not safe to use code compiled with these features",
1087  * while a return value of 1 means "It is not obviously unsafe to use code compiled with these features".
1088  *
1089  * The default implementation simply calls halide_default_can_use_target_features.
1090  */
1091 // @{
1092 extern int halide_can_use_target_features(uint64_t features);
1095 // @}
1096 
1097 /**
1098  * This is the default implementation of halide_can_use_target_features; it is provided
1099  * for convenience of user code that may wish to extend halide_can_use_target_features
1100  * but continue providing existing support, e.g.
1101  *
1102  * int halide_can_use_target_features(uint64_t features) {
1103  * if (features & halide_target_somefeature) {
1104  * if (!can_use_somefeature()) {
1105  * return 0;
1106  * }
1107  * }
1108  * return halide_default_can_use_target_features(features);
1109  * }
1110  */
1112 
1113 
1114 typedef struct halide_dimension_t {
1115  int32_t min, extent, stride;
1116 
1117  // Per-dimension flags. None are defined yet (This is reserved for future use).
1119 
1120 #ifdef __cplusplus
1121  HALIDE_ALWAYS_INLINE halide_dimension_t() : min(0), extent(0), stride(0), flags(0) {}
1123  min(m), extent(e), stride(s), flags(f) {}
1124 
1125  HALIDE_ALWAYS_INLINE bool operator==(const halide_dimension_t &other) const {
1126  return (min == other.min) &&
1127  (extent == other.extent) &&
1128  (stride == other.stride) &&
1129  (flags == other.flags);
1130  }
1131 
1132  HALIDE_ALWAYS_INLINE bool operator!=(const halide_dimension_t &other) const {
1133  return !(*this == other);
1134  }
1135 #endif
1137 
1138 #ifdef __cplusplus
1139 } // extern "C"
1140 #endif
1141 
1144 
1145 /**
1146  * The raw representation of an image passed around by generated
1147  * Halide code. It includes some stuff to track whether the image is
1148  * not actually in main memory, but instead on a device (like a
1149  * GPU). For a more convenient C++ wrapper, use Halide::Buffer<T>. */
1150 typedef struct halide_buffer_t {
1151  /** A device-handle for e.g. GPU memory used to back this buffer. */
1153 
1154  /** The interface used to interpret the above handle. */
1156 
1157  /** A pointer to the start of the data in main memory. In terms of
1158  * the Halide coordinate system, this is the address of the min
1159  * coordinates (defined below). */
1161 
1162  /** flags with various meanings. */
1164 
1165  /** The type of each buffer element. */
1166  struct halide_type_t type;
1167 
1168  /** The dimensionality of the buffer. */
1170 
1171  /** The shape of the buffer. Halide does not own this array - you
1172  * must manage the memory for it yourself. */
1174 
1175  /** Pads the buffer up to a multiple of 8 bytes */
1176  void *padding;
1177 
1178 #ifdef __cplusplus
1179  /** Convenience methods for accessing the flags */
1180  // @{
1181  HALIDE_ALWAYS_INLINE bool get_flag(halide_buffer_flags flag) const {
1182  return (flags & flag) != 0;
1183  }
1184 
1185  HALIDE_ALWAYS_INLINE void set_flag(halide_buffer_flags flag, bool value) {
1186  if (value) {
1187  flags |= flag;
1188  } else {
1189  flags &= ~flag;
1190  }
1191  }
1192 
1193  HALIDE_ALWAYS_INLINE bool host_dirty() const {
1194  return get_flag(halide_buffer_flag_host_dirty);
1195  }
1196 
1197  HALIDE_ALWAYS_INLINE bool device_dirty() const {
1198  return get_flag(halide_buffer_flag_device_dirty);
1199  }
1200 
1201  HALIDE_ALWAYS_INLINE void set_host_dirty(bool v = true) {
1202  set_flag(halide_buffer_flag_host_dirty, v);
1203  }
1204 
1205  HALIDE_ALWAYS_INLINE void set_device_dirty(bool v = true) {
1206  set_flag(halide_buffer_flag_device_dirty, v);
1207  }
1208  // @}
1209 
1210  /** The total number of elements this buffer represents. Equal to
1211  * the product of the extents */
1212  HALIDE_ALWAYS_INLINE size_t number_of_elements() const {
1213  size_t s = 1;
1214  for (int i = 0; i < dimensions; i++) {
1215  s *= dim[i].extent;
1216  }
1217  return s;
1218  }
1219 
1220  /** A pointer to the element with the lowest address. If all
1221  * strides are positive, equal to the host pointer. */
1222  HALIDE_ALWAYS_INLINE uint8_t *begin() const {
1223  ptrdiff_t index = 0;
1224  for (int i = 0; i < dimensions; i++) {
1225  if (dim[i].stride < 0) {
1226  index += dim[i].stride * (dim[i].extent - 1);
1227  }
1228  }
1229  return host + index * type.bytes();
1230  }
1231 
1232  /** A pointer to one beyond the element with the highest address. */
1233  HALIDE_ALWAYS_INLINE uint8_t *end() const {
1234  ptrdiff_t index = 0;
1235  for (int i = 0; i < dimensions; i++) {
1236  if (dim[i].stride > 0) {
1237  index += dim[i].stride * (dim[i].extent - 1);
1238  }
1239  }
1240  index += 1;
1241  return host + index * type.bytes();
1242  }
1243 
1244  /** The total number of bytes spanned by the data in memory. */
1245  HALIDE_ALWAYS_INLINE size_t size_in_bytes() const {
1246  return (size_t)(end() - begin());
1247  }
1248 
1249  /** A pointer to the element at the given location. */
1250  HALIDE_ALWAYS_INLINE uint8_t *address_of(const int *pos) const {
1251  ptrdiff_t index = 0;
1252  for (int i = 0; i < dimensions; i++) {
1253  index += dim[i].stride * (pos[i] - dim[i].min);
1254  }
1255  return host + index * type.bytes();
1256  }
1257 
1258  /** Attempt to call device_sync for the buffer. If the buffer
1259  * has no device_interface (or no device_sync), this is a quiet no-op.
1260  * Calling this explicitly should rarely be necessary, except for profiling. */
1261  HALIDE_ALWAYS_INLINE int device_sync(void *ctx = NULL) {
1262  if (device_interface && device_interface->device_sync) {
1263  return device_interface->device_sync(ctx, this);
1264  }
1265  return 0;
1266  }
1267 
1268  /** Check if an input buffer passed extern stage is a querying
1269  * bounds. Compared to doing the host pointer check directly,
1270  * this both adds clarity to code and will facilitate moving to
1271  * another representation for bounds query arguments. */
1272  HALIDE_ALWAYS_INLINE bool is_bounds_query() const {
1273  return host == NULL && device == 0;
1274  }
1275 
1276 #endif
1277 } halide_buffer_t;
1278 
1279 #ifdef __cplusplus
1280 extern "C" {
1281 #endif
1282 
1283 #ifndef HALIDE_ATTRIBUTE_DEPRECATED
1284 #ifdef HALIDE_ALLOW_DEPRECATED
1285 #define HALIDE_ATTRIBUTE_DEPRECATED(x)
1286 #else
1287 #ifdef _MSC_VER
1288 #define HALIDE_ATTRIBUTE_DEPRECATED(x) __declspec(deprecated(x))
1289 #else
1290 #define HALIDE_ATTRIBUTE_DEPRECATED(x) __attribute__((deprecated(x)))
1291 #endif
1292 #endif
1293 #endif
1294 
1295 /** The old buffer_t, included for compatibility with old code. Don't
1296  * use it. */
1297 #ifndef BUFFER_T_DEFINED
1298 #define BUFFER_T_DEFINED
1299 typedef struct buffer_t {
1302  int32_t extent[4];
1303  int32_t stride[4];
1306  HALIDE_ATTRIBUTE_ALIGN(1) bool host_dirty;
1307  HALIDE_ATTRIBUTE_ALIGN(1) bool dev_dirty;
1308  HALIDE_ATTRIBUTE_ALIGN(1) uint8_t _padding[10 - sizeof(void *)];
1309 } buffer_t;
1310 #endif // BUFFER_T_DEFINED
1311 
1312 /** Copies host pointer, mins, extents, strides, and device state from
1313  * an old-style buffer_t into a new-style halide_buffer_t. The
1314  * dimensions and type fields of the new buffer_t should already be
1315  * set. Returns an error code if the upgrade could not be
1316  * performed. */
1317 extern int halide_upgrade_buffer_t(void *user_context, const char *name,
1318  const buffer_t *old_buf, halide_buffer_t *new_buf);
1319 
1320 /** Copies the host pointer, mins, extents, strides, and device state
1321  * from a halide_buffer_t to a buffer_t. Also sets elem_size. Useful
1322  * for backporting the results of bounds inference. */
1323 extern int halide_downgrade_buffer_t(void *user_context, const char *name,
1324  const halide_buffer_t *new_buf, buffer_t *old_buf);
1325 
1326 /** Copies the dirty flags and device allocation state from a new
1327  * buffer_t back to a legacy buffer_t. */
1328 extern int halide_downgrade_buffer_t_device_fields(void *user_context, const char *name,
1329  const halide_buffer_t *new_buf, buffer_t *old_buf);
1330 
1331 /** halide_scalar_value_t is a simple union able to represent all the well-known
1332  * scalar values in a filter argument. Note that it isn't tagged with a type;
1333  * you must ensure you know the proper type before accessing. Most user
1334  * code will never need to create instances of this struct; its primary use
1335  * is to hold def/min/max values in a halide_filter_argument_t. (Note that
1336  * this is conceptually just a union; it's wrapped in a struct to ensure
1337  * that it doesn't get anonymized by LLVM.)
1338  */
1340  union {
1341  bool b;
1350  float f32;
1351  double f64;
1352  void *handle;
1353  } u;
1354 };
1355 
1360 };
1361 
1362 /*
1363  These structs must be robust across different compilers and settings; when
1364  modifying them, strive for the following rules:
1365 
1366  1) All fields are explicitly sized. I.e. must use int32_t and not "int"
1367  2) All fields must land on an alignment boundary that is the same as their size
1368  3) Explicit padding is added to make that so
1369  4) The sizeof the struct is padded out to a multiple of the largest natural size thing in the struct
1370  5) don't forget that 32 and 64 bit pointers are different sizes
1371 */
1372 
1373 /**
1374  * halide_filter_argument_t is essentially a plain-C-struct equivalent to
1375  * Halide::Argument; most user code will never need to create one.
1376  */
1378  const char *name; // name of the argument; will never be null or empty.
1379  int32_t kind; // actually halide_argument_kind_t
1380  int32_t dimensions; // always zero for scalar arguments
1381  struct halide_type_t type;
1382  // These pointers should always be null for buffer arguments,
1383  // and *may* be null for scalar arguments. (A null value means
1384  // there is no def/min/max specified for this argument.)
1388 };
1389 
1391  /** version of this metadata; currently always 0. */
1393 
1394  /** The number of entries in the arguments field. This is always >= 1. */
1396 
1397  /** An array of the filters input and output arguments; this will never be
1398  * null. The order of arguments is not guaranteed (input and output arguments
1399  * may come in any order); however, it is guaranteed that all arguments
1400  * will have a unique name within a given filter. */
1402 
1403  /** The Target for which the filter was compiled. This is always
1404  * a canonical Target string (ie a product of Target::to_string). */
1405  const char* target;
1406 
1407  /** The function name of the filter. */
1408  const char* name;
1409 };
1410 
1411 /** The functions below here are relevant for pipelines compiled with
1412  * the -profile target flag, which runs a sampling profiler thread
1413  * alongside the pipeline. */
1414 
1415 /** Per-Func state tracked by the sampling profiler. */
1417  /** Total time taken evaluating this Func (in nanoseconds). */
1419 
1420  /** The current memory allocation of this Func. */
1422 
1423  /** The peak memory allocation of this Func. */
1425 
1426  /** The total memory allocation of this Func. */
1428 
1429  /** The peak stack allocation of this Func's threads. */
1431 
1432  /** The average number of thread pool worker threads active while computing this Func. */
1433  uint64_t active_threads_numerator, active_threads_denominator;
1434 
1435  /** The name of this Func. A global constant string. */
1436  const char *name;
1437 
1438  /** The total number of memory allocation of this Func. */
1440 };
1441 
1442 /** Per-pipeline state tracked by the sampling profiler. These exist
1443  * in a linked list. */
1445  /** Total time spent inside this pipeline (in nanoseconds) */
1447 
1448  /** The current memory allocation of funcs in this pipeline. */
1450 
1451  /** The peak memory allocation of funcs in this pipeline. */
1453 
1454  /** The total memory allocation of funcs in this pipeline. */
1456 
1457  /** The average number of thread pool worker threads doing useful
1458  * work while computing this pipeline. */
1459  uint64_t active_threads_numerator, active_threads_denominator;
1460 
1461  /** The name of this pipeline. A global constant string. */
1462  const char *name;
1463 
1464  /** An array containing states for each Func in this pipeline. */
1466 
1467  /** The next pipeline_stats pointer. It's a void * because types
1468  * in the Halide runtime may not currently be recursive. */
1469  void *next;
1470 
1471  /** The number of funcs in this pipeline. */
1473 
1474  /** An internal base id used to identify the funcs in this pipeline. */
1476 
1477  /** The number of times this pipeline has been run. */
1478  int runs;
1479 
1480  /** The total number of samples taken inside of this pipeline. */
1481  int samples;
1482 
1483  /** The total number of memory allocation of funcs in this pipeline. */
1485 };
1486 
1487 /** The global state of the profiler. */
1489  /** Guards access to the fields below. If not locked, the sampling
1490  * profiler thread is free to modify things below (including
1491  * reordering the linked list of pipeline stats). */
1492  struct halide_mutex lock;
1493 
1494  /** The amount of time the profiler thread sleeps between samples
1495  * in milliseconds. Defaults to 1 */
1497 
1498  /** An internal id used for bookkeeping. */
1500 
1501  /** The id of the current running Func. Set by the pipeline, read
1502  * periodically by the profiler thread. */
1504 
1505  /** The number of threads currently doing work. */
1507 
1508  /** A linked list of stats gathered for each pipeline. */
1510 
1511  /** Retrieve remote profiler state. Used so that the sampling
1512  * profiler can follow along with execution that occurs elsewhere,
1513  * e.g. on a DSP. If null, it reads from the int above instead. */
1514  void (*get_remote_profiler_state)(int *func, int *active_workers);
1515 
1516  /** Is the profiler thread running. */
1517  bool started;
1518 };
1519 
1520 /** Profiler func ids with special meanings. */
1521 enum {
1522  /// current_func takes on this value when not inside Halide code
1524  /// Set current_func to this value to tell the profiling thread to
1525  /// halt. It will start up again next time you run a pipeline with
1526  /// profiling enabled.
1528 };
1529 
1530 /** Get a pointer to the global profiler state for programmatic
1531  * inspection. Lock it before using to pause the profiler. */
1533 
1534 /** Get a pointer to the pipeline state associated with pipeline_name.
1535  * This function grabs the global profiler state's lock on entry. */
1536 extern struct halide_profiler_pipeline_stats *halide_profiler_get_pipeline_state(const char *pipeline_name);
1537 
1538 /** Reset all profiler state.
1539  * WARNING: Do NOT call this method while any halide pipeline is
1540  * running; halide_profiler_memory_allocate/free and
1541  * halide_profiler_stack_peak_update update the profiler pipeline's
1542  * state without grabbing the global profiler state's lock. */
1543 extern void halide_profiler_reset();
1544 
1545 /** Print out timing statistics for everything run since the last
1546  * reset. Also happens at process exit. */
1547 extern void halide_profiler_report(void *user_context);
1548 
1549 /// \name "Float16" functions
1550 /// These functions operate of bits (``uint16_t``) representing a half
1551 /// precision floating point number (IEEE-754 2008 binary16).
1552 //{@
1553 
1554 /** Read bits representing a half precision floating point number and return
1555  * the float that represents the same value */
1557 
1558 /** Read bits representing a half precision floating point number and return
1559  * the double that represents the same value */
1561 
1562 // TODO: Conversion functions to half
1563 
1564 //@}
1565 
1566 #ifdef __cplusplus
1567 } // End extern "C"
1568 #endif
1569 
1570 #ifdef __cplusplus
1571 
1572 namespace {
1573 template<typename T> struct check_is_pointer;
1574 template<typename T> struct check_is_pointer<T *> {};
1575 }
1576 
1577 /** Construct the halide equivalent of a C type */
1578 template<typename T>
1579 HALIDE_ALWAYS_INLINE halide_type_t halide_type_of() {
1580  // Create a compile-time error if T is not a pointer (without
1581  // using any includes - this code goes into the runtime).
1582  check_is_pointer<T> check;
1583  (void)check;
1584  return halide_type_t(halide_type_handle, 64);
1585 }
1586 
1587 template<>
1588 HALIDE_ALWAYS_INLINE halide_type_t halide_type_of<float>() {
1589  return halide_type_t(halide_type_float, 32);
1590 }
1591 
1592 template<>
1593 HALIDE_ALWAYS_INLINE halide_type_t halide_type_of<double>() {
1594  return halide_type_t(halide_type_float, 64);
1595 }
1596 
1597 template<>
1598 HALIDE_ALWAYS_INLINE halide_type_t halide_type_of<bool>() {
1599  return halide_type_t(halide_type_uint, 1);
1600 }
1601 
1602 template<>
1603 HALIDE_ALWAYS_INLINE halide_type_t halide_type_of<uint8_t>() {
1604  return halide_type_t(halide_type_uint, 8);
1605 }
1606 
1607 template<>
1608 HALIDE_ALWAYS_INLINE halide_type_t halide_type_of<uint16_t>() {
1609  return halide_type_t(halide_type_uint, 16);
1610 }
1611 
1612 template<>
1613 HALIDE_ALWAYS_INLINE halide_type_t halide_type_of<uint32_t>() {
1614  return halide_type_t(halide_type_uint, 32);
1615 }
1616 
1617 template<>
1618 HALIDE_ALWAYS_INLINE halide_type_t halide_type_of<uint64_t>() {
1619  return halide_type_t(halide_type_uint, 64);
1620 }
1621 
1622 template<>
1623 HALIDE_ALWAYS_INLINE halide_type_t halide_type_of<int8_t>() {
1624  return halide_type_t(halide_type_int, 8);
1625 }
1626 
1627 template<>
1628 HALIDE_ALWAYS_INLINE halide_type_t halide_type_of<int16_t>() {
1629  return halide_type_t(halide_type_int, 16);
1630 }
1631 
1632 template<>
1633 HALIDE_ALWAYS_INLINE halide_type_t halide_type_of<int32_t>() {
1634  return halide_type_t(halide_type_int, 32);
1635 }
1636 
1637 template<>
1638 HALIDE_ALWAYS_INLINE halide_type_t halide_type_of<int64_t>() {
1639  return halide_type_t(halide_type_int, 64);
1640 }
1641 
1642 #endif
1643 
1644 #endif // HALIDE_HALIDERUNTIME_H
int halide_copy_to_host_legacy(void *user_context, struct buffer_t *buf)
Versions of the above functions that accept legacy buffer_t structs.
The Halide runtime encountered an error while trying to detach a native device handle.
The halide_buffer_t * passed to a halide runtime routine is nullptr and this is not allowed...
int halide_device_sync(void *user_context, struct halide_buffer_t *buf)
Wait for current GPU operations to complete.
int halide_error_param_too_large_u64(void *user_context, const char *param_name, uint64_t val, uint64_t max_val)
Various other error conditions.
Disable the bounds querying functionality.
int(* halide_do_par_for_t)(void *, halide_task_t, int, int, uint8_t *)
Set a custom method for performing a parallel for loop.
void halide_error(void *user_context, const char *)
Halide calls this function on runtime errors (for example bounds checking failures).
There is a bug in the Halide compiler.
int32_t dimensions
The length of the coordinates array.
A halide_buffer_t was given with extents that multiply to a number greater than 2^31-1.
struct halide_thread * halide_spawn_thread(void(*f)(void *), void *closure)
Spawn a thread.
int halide_error_param_too_small_f64(void *user_context, const char *param_name, double val, double min_val)
Various other error conditions.
struct halide_profiler_state * halide_profiler_get_state()
Get a pointer to the global profiler state for programmatic inspection.
Cross-platform mutex.
Definition: HalideRuntime.h:97
Unused. (Formerly: Enable the RenderScript runtime.)
int32_t dimensions
The remaining fields are equivalent to those in halide_trace_event_t.
The Halide runtime encountered a host pointer that violated the alignment set for it by way of a call...
void *(* halide_get_library_symbol_t)(void *lib, const char *name)
Halide calls these functions to interact with the underlying system runtime functions.
For Windows compile to MinGW toolset rather then Visual Studio.
decltype((Other) 0==(T) 1) operator==(const Other &a, const GeneratorParam< T > &b)
Equality comparison between GeneratorParam<T> and any type that supports operator== with T...
Definition: Generator.h:899
Set current_func to this value to tell the profiling thread to halt.
Enable x86 (AMD) FMA4 instruction set.
halide_trace_event_code_t
halide_argument_kind_t
The functions below here are relevant for pipelines compiled with the -profile target flag...
A type traits template to provide a halide_handle_cplusplus_type value from a C++ type...
Definition: Type.h:238
void * user_context
Definition: printer.h:31
int halide_error_fold_factor_too_small(void *user_context, const char *func_name, const char *var_name, int fold_factor, const char *loop_name, int required_extent)
Various other error conditions.
A fold_storage directive was used on a dimension that is not accessed in a monotonically increasing o...
int(* device_and_host_malloc)(void *user_context, struct halide_buffer_t *buf, const struct halide_device_interface_t *device_interface)
The type of an mxArray did not match the expected type.
int halide_device_malloc(void *user_context, struct halide_buffer_t *buf, const struct halide_device_interface_t *device_interface)
Allocate device memory to back a halide_buffer_t.
int active_threads
The number of threads currently doing work.
The Halide runtime encountered an error while trying to allocate memory on device.
struct halide_dimension_t halide_dimension_t
int halide_error_buffer_extents_negative(void *user_context, const char *buffer_name, int dimension, int extent)
Various other error conditions.
int halide_error_out_of_memory(void *user_context)
Various other error conditions.
void halide_print(void *user_context, const char *)
Print a message to stderr.
The header of a packet in a binary trace.
int(* detach_native)(void *user_context, struct halide_buffer_t *buf)
int first_free_id
An internal id used for bookkeeping.
Enable hooks for MSAN support.
A fold_storage directive was used with a fold factor that was too small to store all the values of a ...
int num_allocs
The total number of memory allocation of this Func.
halide_do_task_t halide_set_custom_do_task(halide_do_task_t do_task)
If you use the default do_par_for, you can still set a custom handler to perform each individual task...
Per-pipeline state tracked by the sampling profiler.
The Halide runtime encountered an error while trying to wrap a native device handle.
void halide_msan_annotate_buffer_is_initialized_as_destructor(void *user_context, void *buffer)
int halide_error_param_too_small_u64(void *user_context, const char *param_name, uint64_t val, uint64_t min_val)
Various other error conditions.
int halide_error_device_interface_no_device(void *user_context)
Various other error conditions.
int(* device_and_host_free)(void *user_context, struct halide_buffer_t *buf)
int halide_error_unaligned_host_ptr(void *user_context, const char *func_name, int alignment)
Various other error conditions.
A specialize_fail() schedule branch was selected at runtime.
The Halide runtime encountered an error while trying to launch a GPU kernel.
int halide_error_extern_stage_failed(void *user_context, const char *extern_stage_name, int result)
A call to an extern stage failed.
void halide_msan_annotate_buffer_is_initialized(void *user_context, struct halide_buffer_t *buffer)
Mark the data pointed to by the buffer_t as initialized (but not the buffer_t itself), using halide_msan_annotate_memory_is_initialized() for marking.
bool started
Is the profiler thread running.
An uncategorized error occurred.
halide_can_use_target_features_t halide_set_custom_can_use_target_features(halide_can_use_target_features_t)
This function is called internally by Halide in some situations to determine if the current execution...
Generate C++ mangled names for result function, et al.
void(* halide_error_handler_t)(void *, const char *)
Halide calls this function on runtime errors (for example bounds checking failures).
Definition: HalideRuntime.h:86
floating point numbers
Enable the AVX512 features supported by Knight&#39;s Landing chips, such as the Xeon Phi x200...
void * halide_malloc(void *user_context, size_t x)
Halide calls these functions to allocate and free memory.
int halide_error_specialize_fail(void *user_context, const char *message)
Various other error conditions.
int halide_error_buffer_is_null(void *user_context, const char *routine)
Various other error conditions.
int halide_device_malloc_legacy(void *user_context, struct buffer_t *buf, const struct halide_device_interface_t *device_interface)
Versions of the above functions that accept legacy buffer_t structs.
halide_free_t halide_set_custom_free(halide_free_t user_free)
Halide calls these functions to allocate and free memory.
int halide_buffer_copy(void *user_context, struct halide_buffer_t *src, const struct halide_device_interface_t *dst_device_interface, struct halide_buffer_t *dst)
Copy data from one buffer to another.
struct halide_profiler_pipeline_stats * pipelines
A linked list of stats gathered for each pipeline.
signed __INT8_TYPE__ int8_t
void halide_default_print(void *user_context, const char *)
Print a message to stderr.
Do not include a copy of the Halide runtime in any generated object file or assembly.
A halide_buffer_t pointer passed in was NULL.
uint64_t memory_total
The total memory allocation of this Func.
int halide_get_trace_file(void *user_context)
Halide calls this to retrieve the file descriptor to write binary trace events to.
int halide_error_constraint_violated(void *user_context, const char *var, int val, const char *constrained_var, int constrained_val)
Various other error conditions.
Enable Hexagon v62 architecture.
EXPORT Expr print(const std::vector< Expr > &values)
Create an Expr that prints out its value whenever it is evaluated.
void *(* halide_load_library_t)(const char *name)
Halide calls these functions to interact with the underlying system runtime functions.
The elem_size field of a halide_buffer_t does not match the size in bytes of the type of that ImagePa...
struct halide_profiler_pipeline_stats * halide_profiler_get_pipeline_state(const char *pipeline_name)
Get a pointer to the pipeline state associated with pipeline_name.
int(* halide_can_use_target_features_t)(uint64_t)
This function is called internally by Halide in some situations to determine if the current execution...
uint64_t flags
flags with various meanings.
halide_dimension_t * dim
The shape of the buffer.
Enable HVX 128 byte mode.
void halide_msan_annotate_memory_is_initialized(void *user_context, const void *ptr, uint64_t len)
Annotate that a given range of memory has been initialized; only used when Target::MSAN is enabled...
const char * name
The name of this pipeline.
int(* halide_task_t)(void *user_context, int task_number, uint8_t *closure)
Define halide_do_par_for to replace the default thread pool implementation.
int32_t halide_debug_to_file(void *user_context, const char *filename, int32_t type_code, struct halide_buffer_t *buf)
Called when debug_to_file is used inside Halide code.
int halide_default_can_use_target_features(uint64_t features)
This is the default implementation of halide_can_use_target_features; it is provided for convenience ...
void halide_shutdown_thread_pool()
Define halide_do_par_for to replace the default thread pool implementation.
#define NULL
int32_t parent_id
The remaining fields are equivalent to those in halide_trace_event_t.
int halide_copy_to_device_legacy(void *user_context, struct buffer_t *buf, const struct halide_device_interface_t *device_interface)
Versions of the above functions that accept legacy buffer_t structs.
void * halide_default_get_library_symbol(void *lib, const char *name)
Halide calls these functions to interact with the underlying system runtime functions.
The Halide runtime encountered an error while trying to copy from host to device. ...
Enable Hexagon v66 architecture.
unsigned __INT8_TYPE__ uint8_t
void(* halide_free_t)(void *, void *)
Halide calls these functions to allocate and free memory.
int halide_error_bad_type(void *user_context, const char *func_name, uint8_t code_given, uint8_t correct_code, uint8_t bits_given, uint8_t correct_bits, uint16_t lanes_given, uint16_t correct_lanes)
Various other error conditions.
int halide_error_buffer_argument_is_null(void *user_context, const char *buffer_name)
Various other error conditions.
halide_filter_argument_t is essentially a plain-C-struct equivalent to Halide::Argument; most user co...
unsigned integers
Enable CUDA compute capability 3.5 (Kepler)
int halide_copy_to_device(void *user_context, struct halide_buffer_t *buf, const struct halide_device_interface_t *device_interface)
Copy image data from host memory to device memory.
int32_t(* halide_trace_t)(void *user_context, const struct halide_trace_event_t *)
halide_get_symbol_t halide_set_custom_get_symbol(halide_get_symbol_t user_get_symbol)
Halide calls these functions to interact with the underlying system runtime functions.
Expr min(FuncRef a, FuncRef b)
Explicit overloads of min and max for FuncRef.
Definition: Func.h:418
__PTRDIFF_TYPE__ ptrdiff_t
Enable the base AVX512 subset supported by all AVX512 architectures. The specific feature sets are AV...
int halide_device_release_crop(void *user_context, struct halide_buffer_t *buf)
Release any resources associated with a cropped view of another buffer.
Enable CUDA compute capability 5.0 (Maxwell)
Enable the OpenCL runtime.
int num_allocs
The total number of memory allocation of funcs in this pipeline.
Generate code for ARMv7s. Only relevant for 32-bit ARM.
int(* device_sync)(void *user_context, struct halide_buffer_t *buf)
int halide_can_use_target_features(uint64_t features)
This function is called internally by Halide in some situations to determine if the current execution...
int halide_error_failed_to_downgrade_buffer_t(void *user_context, const char *input_name, const char *reason)
Various other error conditions.
The host field on an input or output was null, the device field was not zero, and the pipeline tries ...
The Halide runtime encountered an error while trying to copy from device to host. ...
int halide_device_free_legacy(void *user_context, struct buffer_t *buf)
Versions of the above functions that accept legacy buffer_t structs.
A halide_buffer_t was given that spans more than 2GB of memory.
int halide_device_wrap_native(void *user_context, struct halide_buffer_t *buf, uint64_t handle, const struct halide_device_interface_t *device_interface)
Wrap or detach a native device handle, setting the device field and device_interface field as appropr...
Use SSE 4.1 and earlier instructions. Only relevant on x86.
void halide_mutex_lock(struct halide_mutex *mutex)
A basic set of mutex and condition variable functions, which call platform specific code for mutual e...
int32_t version
version of this metadata; currently always 0.
const struct halide_scalar_value_t * def
decltype((Other) 0 !=(T) 1) operator!=(const Other &a, const GeneratorParam< T > &b)
Inequality comparison between between GeneratorParam<T> and any type that supports operator!= with T...
Definition: Generator.h:908
debug_to_file failed to open or write to the specified file.
int32_t id
The id of this packet (for the purpose of parent_id).
int32_t elem_size
void halide_free(void *user_context, void *ptr)
Halide calls these functions to allocate and free memory.
Enable the OpenGL runtime.
halide_scalar_value_t is a simple union able to represent all the well-known scalar values in a filte...
int halide_error_requirement_failed(void *user_context, const char *condition, const char *message)
Various other error conditions.
An error occurred when attempting to initialize the Matlab runtime.
const char * target
The Target for which the filter was compiled.
struct halide_buffer_t halide_buffer_t
The raw representation of an image passed around by generated Halide code.
uint64_t _private[8]
Definition: HalideRuntime.h:98
double halide_float16_bits_to_double(uint16_t)
Read bits representing a half precision floating point number and return the double that represents t...
halide_error_code_t
The error codes that may be returned by a Halide pipeline.
int halide_error_host_is_null(void *user_context, const char *func_name)
Various other error conditions.
void * halide_load_library(const char *name)
Halide calls these functions to interact with the underlying system runtime functions.
int halide_do_par_for(void *user_context, halide_task_t task, int min, int size, uint8_t *closure)
Define halide_do_par_for to replace the default thread pool implementation.
A Func was given an explicit bound via Func::bound, but this was not large enough to encompass the re...
uint32_t size
The total size of this packet in bytes.
void halide_profiler_report(void *user_context)
Print out timing statistics for everything run since the last reset.
Enable soft float ABI. This only enables the soft float ABI calling convention, which does not necess...
const char * name
The function name of the filter.
Enable 64-bit buffer indexing to support buffers > 2GB. Ignored if bits != 64.
int halide_error_bad_fold(void *user_context, const char *func_name, const char *var_name, const char *loop_name)
Various other error conditions.
Launch a sampling profiler alongside the Halide pipeline that monitors and reports the runtime used b...
uint64_t time
Total time spent inside this pipeline (in nanoseconds)
Use AVX 2 instructions. Only relevant on x86.
Enable the AVX512 features supported by Skylake Xeon server processors. This adds AVX512-VL...
A compiled pipeline was passed the old deprecated buffer_t struct in bounds inference mode...
Each GPU API provides a halide_device_interface_t struct pointing to the code that manages device all...
halide_do_par_for_t halide_set_custom_do_par_for(halide_do_par_for_t do_par_for)
char * end
Definition: printer.h:30
Buffer has both host and device dirty bits set, which violates a Halide invariant.
int halide_memoization_cache_store(void *user_context, const uint8_t *cache_key, int32_t size, struct halide_buffer_t *realized_bounds, int32_t tuple_count, struct halide_buffer_t **tuple_buffers)
Given a cache key for a memoized result, currently constructed from the Func name and top-level Func ...
opaque pointer type (void *)
const struct halide_scalar_value_t * max
uint64_t dev
current_func takes on this value when not inside Halide code
int halide_error_bad_extern_fold(void *user_context, const char *func_name, int dim, int min, int extent, int valid_min, int fold_factor)
Various other error conditions.
void * next
The next pipeline_stats pointer.
int32_t dimensions
The dimensionality of the buffer.
int first_func_id
An internal base id used to identify the funcs in this pipeline.
int halide_device_free(void *user_context, struct halide_buffer_t *buf)
Free device memory.
void * halide_default_load_library(const char *name)
Halide calls these functions to interact with the underlying system runtime functions.
unsigned __INT32_TYPE__ uint32_t
Enable CUDA compute capability 6.1 (Pascal)
Generate code that will run immediately inside the calling process.
int halide_do_task(void *user_context, halide_task_t f, int idx, uint8_t *closure)
If you use the default do_par_for, you can still set a custom handler to perform each individual task...
char * dst
Definition: printer.h:30
Enable HVX 64 byte mode.
halide_error_handler_t halide_set_error_handler(halide_error_handler_t handler)
Halide calls this function on runtime errors (for example bounds checking failures).
uint64_t stack_peak
The peak stack allocation of this Func&#39;s threads.
uint64_t time
Total time taken evaluating this Func (in nanoseconds).
Applying explicit constraints on the size of an input or output buffer shrank the size of that buffer...
Buffer has a non-null device_interface but device is 0, which violates a Halide invariant.
uint64_t memory_current
The current memory allocation of funcs in this pipeline.
uint64_t memory_peak
The peak memory allocation of funcs in this pipeline.
void * halide_default_get_symbol(const char *name)
Halide calls these functions to interact with the underlying system runtime functions.
void halide_memoization_cache_set_size(int64_t size)
Set the soft maximum amount of memory, in bytes, that the LRU cache will use to memoize Func results...
void halide_set_gpu_device(int n)
Selects which gpu device to use.
Enable double support on OpenCL targets.
At least one of the buffer&#39;s extents are negative.
The Halide runtime encountered an error while trying to copy from one buffer to another.
float halide_float16_bits_to_float(uint16_t)
Read bits representing a half precision floating point number and return the float that represents th...
int samples
The total number of samples taken inside of this pipeline.
signed __INT64_TYPE__ int64_t
Trace all stores done by the pipeline. Equivalent to calling Func::trace_stores on every non-inlined ...
int32_t * coordinates
For loads and stores, an array which contains the location being accessed.
A call to halide_malloc returned NULL.
int halide_downgrade_buffer_t(void *user_context, const char *name, const halide_buffer_t *new_buf, buffer_t *old_buf)
Copies the host pointer, mins, extents, strides, and device state from a halide_buffer_t to a buffer_...
int32_t halide_default_trace(void *user_context, const struct halide_trace_event_t *event)
halide_get_library_symbol_t halide_set_custom_get_library_symbol(halide_get_library_symbol_t user_get_library_symbol)
Halide calls these functions to interact with the underlying system runtime functions.
int halide_error_buffer_extents_too_large(void *user_context, const char *buffer_name, int64_t actual_size, int64_t max_size)
Various other error conditions.
Disable all runtime checks, for slightly tighter code.
halide_target_feature_t
Optional features a compilation Target can have.
int halide_upgrade_buffer_t(void *user_context, const char *name, const buffer_t *old_buf, halide_buffer_t *new_buf)
Copies host pointer, mins, extents, strides, and device state from an old-style buffer_t into a new-s...
Enable the CUDA runtime. Defaults to compute capability 2.0 (Fermi)
int halide_error_explicit_bounds_too_small(void *user_context, const char *func_name, const char *var_name, int min_bound, int max_bound, int min_required, int max_required)
Various other error conditions.
uint64_t memory_total
The total memory allocation of funcs in this pipeline.
Use VSX instructions. Only relevant on POWERPC.
#define HALIDE_ALWAYS_INLINE
Definition: HalideRuntime.h:27
Trace all loads done by the pipeline. Equivalent to calling Func::trace_loads on every non-inlined Fu...
void halide_set_trace_file(int fd)
Set the file descriptor that Halide should write binary trace events to.
int halide_error_debug_to_file_failed(void *user_context, const char *func, const char *filename, int error_code)
Various other error conditions.
Turn on debug info and output for runtime code.
int32_t halide_trace(void *user_context, const struct halide_trace_event_t *event)
Called when Funcs are marked as trace_load, trace_store, or trace_realization.
int halide_device_crop(void *user_context, const struct halide_buffer_t *src, struct halide_buffer_t *dst)
Give the destination buffer a device allocation which is an alias for the same coordinate range in th...
int halide_default_do_par_for(void *user_context, halide_task_t task, int min, int size, uint8_t *closure)
The default versions of do_task and do_par_for.
A runtime tag for a type in the halide type system.
void halide_memoization_cache_release(void *user_context, void *host)
If halide_memoization_cache_lookup succeeds, halide_memoization_cache_release must be called to signa...
int halide_memoization_cache_lookup(void *user_context, const uint8_t *cache_key, int32_t size, struct halide_buffer_t *realized_bounds, int32_t tuple_count, struct halide_buffer_t **tuple_buffers)
Given a cache key for a memoized result, currently constructed from the Func name and top-level Func ...
int halide_downgrade_buffer_t_device_fields(void *user_context, const char *name, const halide_buffer_t *new_buf, buffer_t *old_buf)
Copies the dirty flags and device allocation state from a new buffer_t back to a legacy buffer_t...
A folded buffer was passed to an extern stage, but the region touched wraps around the fold boundary...
void * halide_get_symbol(const char *name)
Halide calls these functions to interact with the underlying system runtime functions.
Enable x86 16-bit float support.
void halide_profiler_reset()
Reset all profiler state.
A sentinel. Every target is considered to have this feature, and setting this feature does nothing...
Use POWER ISA 2.07 new instructions. Only relevant on POWERPC.
A constraint on a size or stride of an input or output buffer was not met by the halide_buffer_t pass...
int halide_device_detach_native(void *user_context, struct halide_buffer_t *buf)
Wrap or detach a native device handle, setting the device field and device_interface field as appropr...
void * value
If the event type is a load or a store, this points to the value being loaded or stored.
void halide_default_error(void *user_context, const char *)
Halide calls this function on runtime errors (for example bounds checking failures).
int halide_error_failed_to_upgrade_buffer_t(void *user_context, const char *input_name, const char *reason)
Various other error conditions.
int halide_default_do_task(void *user_context, halide_task_t f, int idx, uint8_t *closure)
The default versions of do_task and do_par_for.
Avoid using NEON instructions. Only relevant for 32-bit ARM.
const struct halide_device_interface_t * device_interface
The interface used to interpret the above handle.
const char * func
The name of the Func or Pipeline that this event refers to.
uint64_t active_threads_numerator
The average number of thread pool worker threads active while computing this Func.
unsigned __INT16_TYPE__ uint16_t
char * buf
Definition: printer.h:30
On every floating point store, set the last bit of the mantissa to zero. Pipelines for which the outp...
int runs
The number of times this pipeline has been run.
uint8_t * host
A pointer to the start of the data in main memory.
void halide_device_release(void *user_context, const struct halide_device_interface_t *device_interface)
Release all data associated with the given device interface, in particular all resources (memory...
int halide_copy_to_host(void *user_context, struct halide_buffer_t *buf)
Copy image data from device memory to host memory.
const struct halide_device_interface_impl_t * impl
Generated code takes a user_context pointer as first argument.
int halide_error_buffer_allocation_too_large(void *user_context, const char *buffer_name, uint64_t allocation_size, uint64_t max_size)
Various other error conditions.
void * padding
Pads the buffer up to a multiple of 8 bytes.
A pipeline would access memory outside of the halide_buffer_t passed in.
Enable x86 FMA instruction.
int halide_create_temp_file(void *user_context, const char *prefix, const char *suffix, char *path_buf, size_t path_buf_size)
Create a unique file with a name of the form prefixXXXXXsuffix in an arbitrary (but writable) directo...
The Halide runtime encountered an error while trying to free a device allocation. ...
__SIZE_TYPE__ size_t
int halide_shutdown_trace()
If tracing is writing to a file.
void *(* halide_get_symbol_t)(const char *name)
Halide calls these functions to interact with the underlying system runtime functions.
void halide_join_thread(struct halide_thread *)
Join a thread.
uint64_t memory_current
The current memory allocation of this Func.
Trace all realizations done by the pipeline. Equivalent to calling Func::trace_realizations on every ...
void halide_mutex_unlock(struct halide_mutex *mutex)
A basic set of mutex and condition variable functions, which call platform specific code for mutual e...
halide_type_code_t
Types in the halide type system.
int(* copy_to_host)(void *user_context, struct halide_buffer_t *buf)
uint64_t device
A device-handle for e.g.
An operation on a buffer required an allocation on a particular device interface, but a device alloca...
Use AVX 1 instructions. Only relevant on x86.
halide_print_t halide_set_custom_print(halide_print_t print)
Print a message to stderr.
int halide_error_param_too_large_f64(void *user_context, const char *param_name, double val, double max_val)
Various other error conditions.
halide_trace_t halide_set_custom_trace(halide_trace_t trace)
int halide_error_host_and_device_dirty(void *user_context)
Various other error conditions.
The global state of the profiler.
int32_t num_arguments
The number of entries in the arguments field.
int sleep_time
The amount of time the profiler thread sleeps between samples in milliseconds.
void * halide_get_library_symbol(void *lib, const char *name)
Halide calls these functions to interact with the underlying system runtime functions.
void halide_memoization_cache_cleanup()
Free all memory and resources associated with the memoization cache.
Generate a mexFunction compatible with Matlab mex libraries. See tools/mex_halide.m.
int halide_error_no_device_interface(void *user_context)
Various other error conditions.
int halide_error_constraints_make_required_region_smaller(void *user_context, const char *buffer_name, int dimension, int constrained_min, int constrained_extent, int required_min, int required_extent)
Various other error conditions.
uint8_t * host
The Halide runtime encountered an error while trying to synchronize with a device.
void halide_default_free(void *user_context, void *ptr)
Halide calls these functions to allocate and free memory.
Enable CUDA compute capability 3.2 (Tegra K1)
halide_load_library_t halide_set_custom_load_library(halide_load_library_t user_load_library)
Halide calls these functions to interact with the underlying system runtime functions.
int halide_error_param_too_small_i64(void *user_context, const char *param_name, int64_t val, int64_t min_val)
Various other error conditions.
Enable Hexagon v65 architecture.
Enable CUDA compute capability 3.0 (Kepler)
Cropping a buffer failed for some other reason.
User-specified require() expression was not satisfied.
int32_t value_index
The remaining fields are equivalent to those in halide_trace_event_t.
Attempted to make cropped alias of a buffer with a device field, but the device_interface does not su...
The raw representation of an image passed around by generated Halide code.
int halide_device_sync_legacy(void *user_context, struct buffer_t *buf)
Versions of the above functions that accept legacy buffer_t structs.
const struct halide_filter_argument_t * arguments
An array of the filters input and output arguments; this will never be null.
int current_func
The id of the current running Func.
unsigned __INT64_TYPE__ uint64_t
Buffer has a non-zero device but no device interface, which violates a Halide invariant.
void(* halide_print_t)(void *, const char *)
Print a message to stderr.
Definition: HalideRuntime.h:72
A scalar parameter passed in was greater than its minimum declared value.
int halide_error_bounds_inference_call_failed(void *user_context, const char *extern_stage_name, int result)
Halide calls the functions below on various error conditions.
uint64_t memory_peak
The peak memory allocation of this Func.
uint64_t active_threads_numerator
The average number of thread pool worker threads doing useful work while computing this pipeline...
struct halide_profiler_func_stats * funcs
An array containing states for each Func in this pipeline.
Enable OpenGL Compute runtime.
int halide_error_param_too_large_i64(void *user_context, const char *param_name, int64_t val, int64_t max_val)
Various other error conditions.
int halide_error_access_out_of_bounds(void *user_context, const char *func_name, int dimension, int min_touched, int max_touched, int min_valid, int max_valid)
Various other error conditions.
const struct halide_scalar_value_t * min
Enable the (Apple) Metal runtime.
Enable the AVX512 features expected to be supported by future Cannonlake processors. This includes all of the Skylake features, plus AVX512-IFMA and AVX512-VBMI.
halide_malloc_t halide_set_custom_malloc(halide_malloc_t user_malloc)
Halide calls these functions to allocate and free memory.
halide_buffer_flags
There was no error.
int(* halide_do_task_t)(void *, halide_task_t, int, uint8_t *)
If you use the default do_par_for, you can still set a custom handler to perform each individual task...
A compiled pipeline was passed the old deprecated buffer_t struct, and it could not be upgraded to a ...
signed __INT32_TYPE__ int32_t
int halide_get_gpu_device(void *user_context)
Halide calls this to get the desired halide gpu device setting.
void halide_mutex_destroy(struct halide_mutex *mutex)
A basic set of mutex and condition variable functions, which call platform specific code for mutual e...
int32_t value_index
If this was a load or store of a Tuple-valued Func, this is which tuple element was accessed...
int num_funcs
The number of funcs in this pipeline.
signed integers
const char * name
The name of this Func.
signed __INT16_TYPE__ int16_t
#define HALIDE_ATTRIBUTE_ALIGN(x)
int halide_set_num_threads(int n)
Set the number of threads used by Halide&#39;s thread pool.
A scalar parameter passed in was smaller than its minimum declared value.
int(* device_free)(void *user_context, struct halide_buffer_t *buf)
void * halide_default_malloc(void *user_context, size_t x)
Halide calls these functions to allocate and free memory.
void *(* halide_malloc_t)(void *, size_t)
Halide calls these functions to allocate and free memory.