Halide 19.0.0
Halide compiler and libraries
Loading...
Searching...
No Matches
HalideBuffer.h
Go to the documentation of this file.
1/** \file
2 * Defines a Buffer type that wraps from halide_buffer_t and adds
3 * functionality, and methods for more conveniently iterating over the
4 * samples in a halide_buffer_t outside of Halide code. */
5
6#ifndef HALIDE_RUNTIME_BUFFER_H
7#define HALIDE_RUNTIME_BUFFER_H
8
9#include <algorithm>
10#include <atomic>
11#include <cassert>
12#include <cstdint>
13#include <cstdlib>
14#include <cstring>
15#include <limits>
16#include <memory>
17#include <vector>
18
19#ifdef __APPLE__
20#include <AvailabilityVersions.h>
21#include <TargetConditionals.h>
22#endif
23
24#if defined(__has_feature)
25#if __has_feature(memory_sanitizer)
26#include <sanitizer/msan_interface.h>
27#endif
28#endif
29
30#include "HalideRuntime.h"
31
32#ifdef _MSC_VER
33#include <malloc.h>
34#define HALIDE_ALLOCA _alloca
35#else
36#define HALIDE_ALLOCA __builtin_alloca
37#endif
38
39// gcc 5.1 has a false positive warning on this code
40#if __GNUC__ == 5 && __GNUC_MINOR__ == 1
41#pragma GCC diagnostic ignored "-Warray-bounds"
42#endif
43
44#ifndef HALIDE_RUNTIME_BUFFER_CHECK_INDICES
45#define HALIDE_RUNTIME_BUFFER_CHECK_INDICES 0
46#endif
47
48#ifndef HALIDE_RUNTIME_BUFFER_ALLOCATION_ALIGNMENT
49// Conservatively align buffer allocations to 128 bytes by default.
50// This is enough alignment for all the platforms currently in use.
51// Redefine this in your compiler settings if you desire more/less alignment.
52#define HALIDE_RUNTIME_BUFFER_ALLOCATION_ALIGNMENT 128
53#endif
54
56 "HALIDE_RUNTIME_BUFFER_ALLOCATION_ALIGNMENT must be a power of 2.");
57
58// Unfortunately, not all C++17 runtimes support aligned_alloc
59// (it may depends on OS/SDK version); this is provided as an opt-out
60// if you are compiling on a platform that doesn't provide a (good)
61// implementation. (Note that we actually use the C11 `::aligned_alloc()`
62// rather than the C++17 `std::aligned_alloc()` because at least one platform
63// we found supports the former but not the latter.)
64#ifndef HALIDE_RUNTIME_BUFFER_USE_ALIGNED_ALLOC
65
66// clang-format off
67#ifdef _MSC_VER
68
69 // MSVC doesn't implement aligned_alloc(), even in C++17 mode, and
70 // has stated they probably never will, so, always default it off here.
71 #define HALIDE_RUNTIME_BUFFER_USE_ALIGNED_ALLOC 0
72
73#elif defined(__ANDROID_API__) && __ANDROID_API__ < 28
74
75 // Android doesn't provide aligned_alloc until API 28
76 #define HALIDE_RUNTIME_BUFFER_USE_ALIGNED_ALLOC 0
77
78#elif defined(__APPLE__)
79
80 #if TARGET_OS_OSX && (__MAC_OS_X_VERSION_MIN_REQUIRED < __MAC_10_15)
81
82 // macOS doesn't provide aligned_alloc until 10.15
83 #define HALIDE_RUNTIME_BUFFER_USE_ALIGNED_ALLOC 0
84
85 #elif TARGET_OS_IPHONE && (__IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_14_0)
86
87 // iOS doesn't provide aligned_alloc until 14.0
88 #define HALIDE_RUNTIME_BUFFER_USE_ALIGNED_ALLOC 0
89
90 #else
91
92 // Assume it's ok on all other Apple targets
93 #define HALIDE_RUNTIME_BUFFER_USE_ALIGNED_ALLOC 1
94
95 #endif
96
97#else
98
99 #if defined(__GLIBCXX__) && !defined(_GLIBCXX_HAVE_ALIGNED_ALLOC)
100
101 // ARM GNU-A baremetal compiler doesn't provide aligned_alloc as of 12.2
102 #define HALIDE_RUNTIME_BUFFER_USE_ALIGNED_ALLOC 0
103
104 #else
105
106 // Not Windows, Android, or Apple: just assume it's ok
107 #define HALIDE_RUNTIME_BUFFER_USE_ALIGNED_ALLOC 1
108
109 #endif
110
111#endif
112// clang-format on
113
114#endif // HALIDE_RUNTIME_BUFFER_USE_ALIGNED_ALLOC
115
116namespace Halide {
117namespace Runtime {
118
119// Forward-declare our Buffer class
120template<typename T, int Dims, int InClassDimStorage>
121class Buffer;
122
123// A helper to check if a parameter pack is entirely implicitly
124// int-convertible to use with std::enable_if
125template<typename... Args>
126struct AllInts : std::false_type {};
127
128template<>
129struct AllInts<> : std::true_type {};
130
131template<typename T, typename... Args>
132struct AllInts<T, Args...> {
133 static const bool value = std::is_convertible<T, int>::value && AllInts<Args...>::value;
134};
135
136// Floats and doubles are technically implicitly int-convertible, but
137// doing so produces a warning we treat as an error, so just disallow
138// it here.
139template<typename... Args>
140struct AllInts<float, Args...> : std::false_type {};
141
142template<typename... Args>
143struct AllInts<double, Args...> : std::false_type {};
144
145namespace Internal {
146// A helper to detect if there are any zeros in a container
147template<typename Container>
148bool any_zero(const Container &c) {
149 for (int i : c) {
150 if (i == 0) {
151 return true;
152 }
153 }
154 return false;
155}
156
158 static inline void *(*default_allocate_fn)(size_t) = nullptr;
159 static inline void (*default_deallocate_fn)(void *) = nullptr;
160};
161} // namespace Internal
162
163/** A struct acting as a header for allocations owned by the Buffer
164 * class itself. */
166 void (*deallocate_fn)(void *);
167 std::atomic<int> ref_count;
168
169 // Note that ref_count always starts at 1
170 explicit AllocationHeader(void (*deallocate_fn)(void *))
172 }
173};
174
175/** This indicates how to deallocate the device for a Halide::Runtime::Buffer. */
176enum struct BufferDeviceOwnership : int {
177 Allocated, ///> halide_device_free will be called when device ref count goes to zero
178 WrappedNative, ///> halide_device_detach_native will be called when device ref count goes to zero
179 Unmanaged, ///> No free routine will be called when device ref count goes to zero
180 AllocatedDeviceAndHost, ///> Call device_and_host_free when DevRefCount goes to zero.
181 Cropped, ///> Call halide_device_release_crop when DevRefCount goes to zero.
182};
183
184/** A similar struct for managing device allocations. */
186 // This is only ever constructed when there's something to manage,
187 // so start at one.
188 std::atomic<int> count{1};
190};
191
192constexpr int AnyDims = -1;
193
194/** A templated Buffer class that wraps halide_buffer_t and adds
195 * functionality. When using Halide from C++, this is the preferred
196 * way to create input and output buffers. The overhead of using this
197 * class relative to a naked halide_buffer_t is minimal - it uses another
198 * ~16 bytes on the stack, and does no dynamic allocations when using
199 * it to represent existing memory of a known maximum dimensionality.
200 *
201 * The template parameter T is the element type. For buffers where the
202 * element type is unknown, or may vary, use void or const void.
203 *
204 * The template parameter Dims is the number of dimensions. For buffers where
205 * the dimensionality type is unknown at, or may vary, use AnyDims.
206 *
207 * InClassDimStorage is the maximum number of dimensions that can be represented
208 * using space inside the class itself. Set it to the maximum dimensionality
209 * you expect this buffer to be. If the actual dimensionality exceeds
210 * this, heap storage is allocated to track the shape of the buffer.
211 * InClassDimStorage defaults to 4, which should cover nearly all usage.
212 *
213 * The class optionally allocates and owns memory for the image using
214 * a shared pointer allocated with the provided allocator. If they are
215 * null, malloc and free are used. Any device-side allocation is
216 * considered as owned if and only if the host-side allocation is
217 * owned. */
218template<typename T = void,
219 int Dims = AnyDims,
220 int InClassDimStorage = (Dims == AnyDims ? 4 : std::max(Dims, 1))>
221class Buffer {
222 /** The underlying halide_buffer_t */
223 halide_buffer_t buf = {};
224
225 /** Some in-class storage for shape of the dimensions. */
226 halide_dimension_t shape[InClassDimStorage];
227
228 /** The allocation owned by this Buffer. NULL if the Buffer does not
229 * own the memory. */
230 AllocationHeader *alloc = nullptr;
231
232 /** A reference count for the device allocation owned by this
233 * buffer. */
234 mutable DeviceRefCount *dev_ref_count = nullptr;
235
236 /** True if T is of type void or const void */
237 static const bool T_is_void = std::is_same<typename std::remove_const<T>::type, void>::value;
238
239 /** A type function that adds a const qualifier if T is a const type. */
240 template<typename T2>
241 using add_const_if_T_is_const = typename std::conditional<std::is_const<T>::value, const T2, T2>::type;
242
243 /** T unless T is (const) void, in which case (const)
244 * uint8_t. Useful for providing return types for operator() */
245 using not_void_T = typename std::conditional<T_is_void,
246 add_const_if_T_is_const<uint8_t>,
247 T>::type;
248
249 /** T with constness removed. Useful for return type of copy(). */
250 using not_const_T = typename std::remove_const<T>::type;
251
252 /** The type the elements are stored as. Equal to not_void_T
253 * unless T is a pointer, in which case uint64_t. Halide stores
254 * all pointer types as uint64s internally, even on 32-bit
255 * systems. */
256 using storage_T = typename std::conditional<std::is_pointer<T>::value, uint64_t, not_void_T>::type;
257
258public:
259 /** True if the Halide type is not void (or const void). */
260 static constexpr bool has_static_halide_type = !T_is_void;
261
262 /** Get the Halide type of T. Callers should not use the result if
263 * has_static_halide_type is false. */
265 return halide_type_of<typename std::remove_cv<not_void_T>::type>();
266 }
267
268 /** Does this Buffer own the host memory it refers to? */
269 bool owns_host_memory() const {
270 return alloc != nullptr;
271 }
272
273 static constexpr bool has_static_dimensions = (Dims != AnyDims);
274
275 /** Callers should not use the result if
276 * has_static_dimensions is false. */
277 static constexpr int static_dimensions() {
278 return Dims;
279 }
280
281 static_assert(!has_static_dimensions || static_dimensions() >= 0);
282
283private:
284 /** Increment the reference count of any owned allocation */
285 void incref() const {
286 if (owns_host_memory()) {
287 alloc->ref_count++;
288 }
289 if (buf.device) {
290 if (!dev_ref_count) {
291 // I seem to have a non-zero dev field but no
292 // reference count for it. I must have been given a
293 // device allocation by a Halide pipeline, and have
294 // never been copied from since. Take sole ownership
295 // of it.
296 dev_ref_count = new DeviceRefCount;
297 }
298 dev_ref_count->count++;
299 }
300 }
301
302 // Note that this is called "cropped" but can also encompass a slice/embed
303 // operation as well.
304 struct DevRefCountCropped : DeviceRefCount {
305 // We will only store Buffers that have a dynamic number of dimensions.
306 // Buffers that cropped or sliced from need to be first converted to
307 // one with variable size. This is required because we cannot possibly
308 // know what the actual dimensionality is of the buffer this is a
309 // crop or slice from. Since cropping a sliced buffer is also possible,
310 // no optimizations can be made for cropped buffers either.
311 Buffer<T, AnyDims> cropped_from;
312 explicit DevRefCountCropped(const Buffer<T, AnyDims> &cropped_from)
313 : cropped_from(cropped_from) {
315 }
316 };
317
318 /** Setup the device ref count for a buffer to indicate it is a crop (or slice, embed, etc) of cropped_from */
319 void crop_from(const Buffer<T, AnyDims> &cropped_from) {
320 assert(dev_ref_count == nullptr);
321 dev_ref_count = new DevRefCountCropped(cropped_from);
322 }
323
324 /** Decrement the reference count of any owned allocation and free host
325 * and device memory if it hits zero. Sets alloc to nullptr. */
326 void decref(bool device_only = false) {
327 if (owns_host_memory() && !device_only) {
328 int new_count = --(alloc->ref_count);
329 if (new_count == 0) {
330 void (*fn)(void *) = alloc->deallocate_fn;
331 alloc->~AllocationHeader();
332 fn(alloc);
333 }
334 buf.host = nullptr;
335 alloc = nullptr;
336 set_host_dirty(false);
337 }
338 int new_count = 0;
339 if (dev_ref_count) {
340 new_count = --(dev_ref_count->count);
341 }
342 if (new_count == 0) {
343 if (buf.device) {
344 assert(!(alloc && device_dirty()) &&
345 "Implicitly freeing a dirty device allocation while a host allocation still lives. "
346 "Call device_free explicitly if you want to drop dirty device-side data. "
347 "Call copy_to_host explicitly if you want the data copied to the host allocation "
348 "before the device allocation is freed.");
349 int result = halide_error_code_success;
350 if (dev_ref_count && dev_ref_count->ownership == BufferDeviceOwnership::WrappedNative) {
351 result = buf.device_interface->detach_native(nullptr, &buf);
352 } else if (dev_ref_count && dev_ref_count->ownership == BufferDeviceOwnership::AllocatedDeviceAndHost) {
353 result = buf.device_interface->device_and_host_free(nullptr, &buf);
354 } else if (dev_ref_count && dev_ref_count->ownership == BufferDeviceOwnership::Cropped) {
355 result = buf.device_interface->device_release_crop(nullptr, &buf);
356 } else if (dev_ref_count == nullptr || dev_ref_count->ownership == BufferDeviceOwnership::Allocated) {
357 result = buf.device_interface->device_free(nullptr, &buf);
358 }
359 // No reasonable way to return the error, but we can at least assert-fail in debug builds.
360 assert((result == halide_error_code_success) && "device_interface call returned a nonzero result in Buffer::decref()");
361 (void)result;
362 }
363 if (dev_ref_count) {
364 if (dev_ref_count->ownership == BufferDeviceOwnership::Cropped) {
365 delete (DevRefCountCropped *)dev_ref_count;
366 } else {
367 delete dev_ref_count;
368 }
369 }
370 }
371 dev_ref_count = nullptr;
372 buf.device = 0;
373 buf.device_interface = nullptr;
374 }
375
376 void free_shape_storage() {
377 if (buf.dim != shape) {
378 delete[] buf.dim;
379 buf.dim = nullptr;
380 }
381 }
382
383 template<int DimsSpecified>
384 void make_static_shape_storage() {
385 static_assert(Dims == AnyDims || Dims == DimsSpecified,
386 "Number of arguments to Buffer() does not match static dimensionality");
387 buf.dimensions = DimsSpecified;
388 if constexpr (Dims == AnyDims) {
389 if constexpr (DimsSpecified <= InClassDimStorage) {
390 buf.dim = shape;
391 } else {
392 static_assert(DimsSpecified >= 1);
393 buf.dim = new halide_dimension_t[DimsSpecified];
394 }
395 } else {
396 static_assert(InClassDimStorage >= Dims);
397 buf.dim = shape;
398 }
399 }
400
401 void make_shape_storage(const int dimensions) {
402 if (Dims != AnyDims && Dims != dimensions) {
403 assert(false && "Number of arguments to Buffer() does not match static dimensionality");
404 }
405 // This should usually be inlined, so if dimensions is statically known,
406 // we can skip the call to new
408 buf.dim = (dimensions <= InClassDimStorage) ? shape : new halide_dimension_t[dimensions];
409 }
410
411 void copy_shape_from(const halide_buffer_t &other) {
412 // All callers of this ensure that buf.dimensions == other.dimensions.
413 make_shape_storage(other.dimensions);
414 std::copy(other.dim, other.dim + other.dimensions, buf.dim);
415 }
416
417 template<typename T2, int D2, int S2>
418 void move_shape_from(Buffer<T2, D2, S2> &&other) {
419 if (other.shape == other.buf.dim) {
420 copy_shape_from(other.buf);
421 } else {
422 buf.dim = other.buf.dim;
423 other.buf.dim = nullptr;
424 }
425 other.buf = halide_buffer_t();
426 }
427
428 /** Initialize the shape from a halide_buffer_t. */
429 void initialize_from_buffer(const halide_buffer_t &b,
430 BufferDeviceOwnership ownership) {
431 memcpy(&buf, &b, sizeof(halide_buffer_t));
432 copy_shape_from(b);
433 if (b.device) {
434 dev_ref_count = new DeviceRefCount;
435 dev_ref_count->ownership = ownership;
436 }
437 }
438
439 /** Initialize the shape from an array of ints */
440 void initialize_shape(const int *sizes) {
441 for (int i = 0; i < buf.dimensions; i++) {
442 buf.dim[i].min = 0;
443 buf.dim[i].extent = sizes[i];
444 if (i == 0) {
445 buf.dim[i].stride = 1;
446 } else {
447 buf.dim[i].stride = buf.dim[i - 1].stride * buf.dim[i - 1].extent;
448 }
449 }
450 }
451
452 /** Initialize the shape from a vector of extents */
453 void initialize_shape(const std::vector<int> &sizes) {
454 assert(buf.dimensions == (int)sizes.size());
455 initialize_shape(sizes.data());
456 }
457
458 /** Initialize the shape from the static shape of an array */
459 template<typename Array, size_t N>
460 void initialize_shape_from_array_shape(int next, Array (&vals)[N]) {
461 buf.dim[next].min = 0;
462 buf.dim[next].extent = (int)N;
463 if (next == 0) {
464 buf.dim[next].stride = 1;
465 } else {
466 initialize_shape_from_array_shape(next - 1, vals[0]);
467 buf.dim[next].stride = buf.dim[next - 1].stride * buf.dim[next - 1].extent;
468 }
469 }
470
471 /** Base case for the template recursion above. */
472 template<typename T2>
473 void initialize_shape_from_array_shape(int, const T2 &) {
474 }
475
476 /** Get the dimensionality of a multi-dimensional C array */
477 template<typename Array, size_t N>
478 static int dimensionality_of_array(Array (&vals)[N]) {
479 return dimensionality_of_array(vals[0]) + 1;
480 }
481
482 template<typename T2>
483 static int dimensionality_of_array(const T2 &) {
484 return 0;
485 }
486
487 /** Get the underlying halide_type_t of an array's element type. */
488 template<typename Array, size_t N>
489 static halide_type_t scalar_type_of_array(Array (&vals)[N]) {
490 return scalar_type_of_array(vals[0]);
491 }
492
493 template<typename T2>
494 static halide_type_t scalar_type_of_array(const T2 &) {
495 return halide_type_of<typename std::remove_cv<T2>::type>();
496 }
497
498 /** Crop a single dimension without handling device allocation. */
499 void crop_host(int d, int min, int extent) {
500 assert(dim(d).min() <= min);
501 assert(dim(d).max() >= min + extent - 1);
502 ptrdiff_t shift = min - dim(d).min();
503 if (buf.host != nullptr) {
504 buf.host += (shift * dim(d).stride()) * type().bytes();
505 }
506 buf.dim[d].min = min;
507 buf.dim[d].extent = extent;
508 }
509
510 /** Crop as many dimensions as are in rect, without handling device allocation. */
511 void crop_host(const std::vector<std::pair<int, int>> &rect) {
512 assert(rect.size() <= static_cast<decltype(rect.size())>(std::numeric_limits<int>::max()));
513 int limit = (int)rect.size();
514 assert(limit <= dimensions());
515 for (int i = 0; i < limit; i++) {
516 crop_host(i, rect[i].first, rect[i].second);
517 }
518 }
519
520 void complete_device_crop(Buffer<T, Dims, InClassDimStorage> &result_host_cropped) const {
521 assert(buf.device_interface != nullptr);
522 if (buf.device_interface->device_crop(nullptr, &this->buf, &result_host_cropped.buf) == halide_error_code_success) {
523 // TODO: Figure out what to do if dev_ref_count is nullptr. Should incref logic run here?
524 // is it possible to get to this point without incref having run at least once since
525 // the device field was set? (I.e. in the internal logic of crop. incref might have been
526 // called.)
527 if (dev_ref_count != nullptr && dev_ref_count->ownership == BufferDeviceOwnership::Cropped) {
528 result_host_cropped.crop_from(((DevRefCountCropped *)dev_ref_count)->cropped_from);
529 } else {
530 result_host_cropped.crop_from(*this);
531 }
532 }
533 }
534
535 /** slice a single dimension without handling device allocation. */
536 void slice_host(int d, int pos) {
537 static_assert(Dims == AnyDims);
538 assert(dimensions() > 0);
539 assert(d >= 0 && d < dimensions());
540 assert(pos >= dim(d).min() && pos <= dim(d).max());
541 buf.dimensions--;
542 ptrdiff_t shift = pos - buf.dim[d].min;
543 if (buf.host != nullptr) {
544 buf.host += (shift * buf.dim[d].stride) * type().bytes();
545 }
546 for (int i = d; i < buf.dimensions; i++) {
547 buf.dim[i] = buf.dim[i + 1];
548 }
549 buf.dim[buf.dimensions] = {0, 0, 0};
550 }
551
552 void complete_device_slice(Buffer<T, AnyDims, InClassDimStorage> &result_host_sliced, int d, int pos) const {
553 assert(buf.device_interface != nullptr);
554 if (buf.device_interface->device_slice(nullptr, &this->buf, d, pos, &result_host_sliced.buf) == halide_error_code_success) {
555 // TODO: Figure out what to do if dev_ref_count is nullptr. Should incref logic run here?
556 // is it possible to get to this point without incref having run at least once since
557 // the device field was set? (I.e. in the internal logic of slice. incref might have been
558 // called.)
559 if (dev_ref_count != nullptr && dev_ref_count->ownership == BufferDeviceOwnership::Cropped) {
560 // crop_from() is correct here, despite the fact that we are slicing.
561 result_host_sliced.crop_from(((DevRefCountCropped *)dev_ref_count)->cropped_from);
562 } else {
563 // crop_from() is correct here, despite the fact that we are slicing.
564 result_host_sliced.crop_from(*this);
565 }
566 }
567 }
568
569public:
570 typedef T ElemType;
571
572 /** Read-only access to the shape */
573 class Dimension {
574 const halide_dimension_t &d;
575
576 public:
577 /** The lowest coordinate in this dimension */
579 return d.min;
580 }
581
582 /** The number of elements in memory you have to step over to
583 * increment this coordinate by one. */
585 return d.stride;
586 }
587
588 /** The extent of the image along this dimension */
590 return d.extent;
591 }
592
593 /** The highest coordinate in this dimension */
595 return min() + extent() - 1;
596 }
597
598 /** An iterator class, so that you can iterate over
599 * coordinates in a dimensions using a range-based for loop. */
600 struct iterator {
601 int val;
602 int operator*() const {
603 return val;
604 }
605 bool operator!=(const iterator &other) const {
606 return val != other.val;
607 }
609 val++;
610 return *this;
611 }
612 };
613
614 /** An iterator that points to the min coordinate */
616 return {min()};
617 }
618
619 /** An iterator that points to one past the max coordinate */
621 return {min() + extent()};
622 }
623
625 : d(dim) {
626 }
627 };
628
629 /** Access the shape of the buffer */
631 assert(i >= 0 && i < this->dimensions());
632 return Dimension(buf.dim[i]);
633 }
634
635 /** Access to the mins, strides, extents. Will be deprecated. Do not use. */
636 // @{
637 int min(int i) const {
638 return dim(i).min();
639 }
640 int extent(int i) const {
641 return dim(i).extent();
642 }
643 int stride(int i) const {
644 return dim(i).stride();
645 }
646 // @}
647
648 /** The total number of elements this buffer represents. Equal to
649 * the product of the extents */
650 size_t number_of_elements() const {
651 return buf.number_of_elements();
652 }
653
654 /** Get the dimensionality of the buffer. */
655 int dimensions() const {
656 if constexpr (has_static_dimensions) {
657 return Dims;
658 } else {
659 return buf.dimensions;
660 }
661 }
662
663 /** Get the type of the elements. */
665 return buf.type;
666 }
667
668 /** A pointer to the element with the lowest address. If all
669 * strides are positive, equal to the host pointer. */
670 T *begin() const {
671 assert(buf.host != nullptr); // Cannot call begin() on an unallocated Buffer.
672 return (T *)buf.begin();
673 }
674
675 /** A pointer to one beyond the element with the highest address. */
676 T *end() const {
677 assert(buf.host != nullptr); // Cannot call end() on an unallocated Buffer.
678 return (T *)buf.end();
679 }
680
681 /** The total number of bytes spanned by the data in memory. */
682 size_t size_in_bytes() const {
683 return buf.size_in_bytes();
684 }
685
686 /** Reset the Buffer to be equivalent to a default-constructed Buffer
687 * of the same static type (if any); Buffer<void> will have its runtime
688 * type reset to uint8. */
689 void reset() {
690 *this = Buffer();
691 }
692
694 : shape() {
695 buf.type = static_halide_type();
696 // If Dims are statically known, must create storage that many.
697 // otherwise, make a zero-dimensional buffer.
698 constexpr int buf_dimensions = (Dims == AnyDims) ? 0 : Dims;
699 make_static_shape_storage<buf_dimensions>();
700 }
701
702 /** Make a Buffer from a halide_buffer_t */
703 explicit Buffer(const halide_buffer_t &buf,
705 assert(T_is_void || buf.type == static_halide_type());
706 initialize_from_buffer(buf, ownership);
707 }
708
709 /** Give Buffers access to the members of Buffers of different dimensionalities and types. */
710 template<typename T2, int D2, int S2>
711 friend class Buffer;
712
713private:
714 template<typename T2, int D2, int S2>
715 static void static_assert_can_convert_from() {
716 static_assert((!std::is_const<T2>::value || std::is_const<T>::value),
717 "Can't convert from a Buffer<const T> to a Buffer<T>");
718 static_assert(std::is_same<typename std::remove_const<T>::type,
719 typename std::remove_const<T2>::type>::value ||
721 "type mismatch constructing Buffer");
722 static_assert(Dims == AnyDims || D2 == AnyDims || Dims == D2,
723 "Can't convert from a Buffer with static dimensionality to a Buffer with different static dimensionality");
724 }
725
726public:
727 static void set_default_allocate_fn(void *(*allocate_fn)(size_t)) {
729 }
730 static void set_default_deallocate_fn(void (*deallocate_fn)(void *)) {
732 }
733
734 /** Determine if a Buffer<T, Dims, InClassDimStorage> can be constructed from some other Buffer type.
735 * If this can be determined at compile time, fail with a static assert; otherwise
736 * return a boolean based on runtime typing. */
737 template<typename T2, int D2, int S2>
738 static bool can_convert_from(const Buffer<T2, D2, S2> &other) {
739 static_assert_can_convert_from<T2, D2, S2>();
740 if (Buffer<T2, D2, S2>::T_is_void && !T_is_void) {
741 if (other.type() != static_halide_type()) {
742 return false;
743 }
744 }
745 if (Dims != AnyDims) {
746 if (other.dimensions() != Dims) {
747 return false;
748 }
749 }
750 return true;
751 }
752
753 /** Fail an assertion at runtime or compile-time if an Buffer<T, Dims, InClassDimStorage>
754 * cannot be constructed from some other Buffer type. */
755 template<typename T2, int D2, int S2>
756 static void assert_can_convert_from(const Buffer<T2, D2, S2> &other) {
757 // Explicitly call static_assert_can_convert_from() here so
758 // that we always get compile-time checking, even if compiling with
759 // assertions disabled.
760 static_assert_can_convert_from<T2, D2, S2>();
761 assert(can_convert_from(other));
762 }
763
764 /** Copy constructor. Does not copy underlying data. */
766 : buf(other.buf),
767 alloc(other.alloc) {
768 other.incref();
769 dev_ref_count = other.dev_ref_count;
770 copy_shape_from(other.buf);
771 }
772
773 /** Construct a Buffer from a Buffer of different dimensionality
774 * and type. Asserts that the type and dimensionality matches (at runtime,
775 * if one of the types is void). Note that this constructor is
776 * implicit. This, for example, lets you pass things like
777 * Buffer<T> or Buffer<const void> to functions expected
778 * Buffer<const T>. */
779 template<typename T2, int D2, int S2>
781 : buf(other.buf),
782 alloc(other.alloc) {
784 other.incref();
785 dev_ref_count = other.dev_ref_count;
786 copy_shape_from(other.buf);
787 }
788
789 /** Move constructor */
791 : buf(other.buf),
792 alloc(other.alloc),
793 dev_ref_count(other.dev_ref_count) {
794 other.dev_ref_count = nullptr;
795 other.alloc = nullptr;
796 move_shape_from(std::forward<Buffer<T, Dims, InClassDimStorage>>(other));
797 }
798
799 /** Move-construct a Buffer from a Buffer of different
800 * dimensionality and type. Asserts that the types match (at
801 * runtime if one of the types is void). */
802 template<typename T2, int D2, int S2>
804 : buf(other.buf),
805 alloc(other.alloc),
806 dev_ref_count(other.dev_ref_count) {
808 other.dev_ref_count = nullptr;
809 other.alloc = nullptr;
810 move_shape_from(std::forward<Buffer<T2, D2, S2>>(other));
811 }
812
813 /** Assign from another Buffer of possibly-different
814 * dimensionality and type. Asserts that the types match (at
815 * runtime if one of the types is void). */
816 template<typename T2, int D2, int S2>
818 if ((const void *)this == (const void *)&other) {
819 return *this;
820 }
822 other.incref();
823 decref();
824 dev_ref_count = other.dev_ref_count;
825 alloc = other.alloc;
826 free_shape_storage();
827 buf = other.buf;
828 copy_shape_from(other.buf);
829 return *this;
830 }
831
832 /** Standard assignment operator */
834 // The cast to void* here is just to satisfy clang-tidy
835 if ((const void *)this == (const void *)&other) {
836 return *this;
837 }
838 other.incref();
839 decref();
840 dev_ref_count = other.dev_ref_count;
841 alloc = other.alloc;
842 free_shape_storage();
843 buf = other.buf;
844 copy_shape_from(other.buf);
845 return *this;
846 }
847
848 /** Move from another Buffer of possibly-different
849 * dimensionality and type. Asserts that the types match (at
850 * runtime if one of the types is void). */
851 template<typename T2, int D2, int S2>
854 decref();
855 alloc = other.alloc;
856 other.alloc = nullptr;
857 dev_ref_count = other.dev_ref_count;
858 other.dev_ref_count = nullptr;
859 free_shape_storage();
860 buf = other.buf;
861 move_shape_from(std::forward<Buffer<T2, D2, S2>>(other));
862 return *this;
863 }
864
865 /** Standard move-assignment operator */
867 decref();
868 alloc = other.alloc;
869 other.alloc = nullptr;
870 dev_ref_count = other.dev_ref_count;
871 other.dev_ref_count = nullptr;
872 free_shape_storage();
873 buf = other.buf;
874 move_shape_from(std::forward<Buffer<T, Dims, InClassDimStorage>>(other));
875 return *this;
876 }
877
878 /** Check the product of the extents fits in memory. */
880 size_t size = type().bytes();
881 for (int i = 0; i < dimensions(); i++) {
882 size *= dim(i).extent();
883 }
884 // We allow 2^31 or 2^63 bytes, so drop the top bit.
885 size = (size << 1) >> 1;
886 for (int i = 0; i < dimensions(); i++) {
887 size /= dim(i).extent();
888 }
889 assert(size == (size_t)type().bytes() && "Error: Overflow computing total size of buffer.");
890 }
891
892 /** Allocate memory for this Buffer. Drops the reference to any
893 * owned memory. */
894 void allocate(void *(*allocate_fn)(size_t) = nullptr,
895 void (*deallocate_fn)(void *) = nullptr) {
896 // Drop any existing allocation
897 deallocate();
898
899 // Conservatively align images to (usually) 128 bytes. This is enough
900 // alignment for all the platforms we might use. Also ensure that the allocation
901 // is such that the logical size is an integral multiple of 128 bytes (or a bit more).
902 constexpr size_t alignment = HALIDE_RUNTIME_BUFFER_ALLOCATION_ALIGNMENT;
903
904 const auto align_up = [=](size_t value) -> size_t {
905 return (value + alignment - 1) & ~(alignment - 1);
906 };
907
908 size_t size = size_in_bytes();
909
910#if HALIDE_RUNTIME_BUFFER_USE_ALIGNED_ALLOC
911 // Only use aligned_alloc() if no custom allocators are specified.
913 // As a practical matter, sizeof(AllocationHeader) is going to be no more than 16 bytes
914 // on any supported platform, so we will just overallocate by 'alignment'
915 // so that the user storage also starts at an aligned point. This is a bit
916 // wasteful, but probably not a big deal.
917 static_assert(sizeof(AllocationHeader) <= alignment);
918 void *alloc_storage = ::aligned_alloc(alignment, align_up(size) + alignment);
919 assert((uintptr_t)alloc_storage == align_up((uintptr_t)alloc_storage));
920 alloc = new (alloc_storage) AllocationHeader(free);
921 buf.host = (uint8_t *)((uintptr_t)alloc_storage + alignment);
922 return;
923 }
924 // else fall thru
925#endif
926 if (!allocate_fn) {
928 if (!allocate_fn) {
929 allocate_fn = malloc;
930 }
931 }
932 if (!deallocate_fn) {
934 if (!deallocate_fn) {
935 deallocate_fn = free;
936 }
937 }
938
939 static_assert(sizeof(AllocationHeader) <= alignment);
940
941 // malloc() and friends must return a pointer aligned to at least alignof(std::max_align_t);
942 // make sure this is OK for AllocationHeader, since it always goes at the start
943 static_assert(alignof(AllocationHeader) <= alignof(std::max_align_t));
944
945 const size_t requested_size = align_up(size + alignment +
946 std::max(0, (int)sizeof(AllocationHeader) -
947 (int)sizeof(std::max_align_t)));
948 void *alloc_storage = allocate_fn(requested_size);
949 alloc = new (alloc_storage) AllocationHeader(deallocate_fn);
950 uint8_t *unaligned_ptr = ((uint8_t *)alloc) + sizeof(AllocationHeader);
951 buf.host = (uint8_t *)align_up((uintptr_t)unaligned_ptr);
952 }
953
954 /** Drop reference to any owned host or device memory, possibly
955 * freeing it, if this buffer held the last reference to
956 * it. Retains the shape of the buffer. Does nothing if this
957 * buffer did not allocate its own memory. */
958 void deallocate() {
959 decref();
960 }
961
962 /** Drop reference to any owned device memory, possibly freeing it
963 * if this buffer held the last reference to it. Asserts that
964 * device_dirty is false. */
966 decref(true);
967 }
968
969 /** Allocate a new image of the given size with a runtime
970 * type. Only used when you do know what size you want but you
971 * don't know statically what type the elements are. Pass zeroes
972 * to make a buffer suitable for bounds query calls. */
973 template<typename... Args,
974 typename = typename std::enable_if<AllInts<Args...>::value>::type>
975 Buffer(halide_type_t t, int first, Args... rest) {
976 if (!T_is_void) {
977 assert(static_halide_type() == t);
978 }
979 int extents[] = {first, (int)rest...};
980 buf.type = t;
981 constexpr int buf_dimensions = 1 + (int)(sizeof...(rest));
982 make_static_shape_storage<buf_dimensions>();
983 initialize_shape(extents);
984 if (!Internal::any_zero(extents)) {
986 allocate();
987 }
988 }
989
990 /** Allocate a new image of the given size. Pass zeroes to make a
991 * buffer suitable for bounds query calls. */
992 // @{
993
994 // The overload with one argument is 'explicit', so that
995 // (say) int is not implicitly convertible to Buffer<int>
996 explicit Buffer(int first) {
997 static_assert(!T_is_void,
998 "To construct an Buffer<void>, pass a halide_type_t as the first argument to the constructor");
999 int extents[] = {first};
1000 buf.type = static_halide_type();
1001 constexpr int buf_dimensions = 1;
1002 make_static_shape_storage<buf_dimensions>();
1003 initialize_shape(extents);
1004 if (first != 0) {
1006 allocate();
1007 }
1008 }
1009
1010 template<typename... Args,
1011 typename = typename std::enable_if<AllInts<Args...>::value>::type>
1012 Buffer(int first, int second, Args... rest) {
1013 static_assert(!T_is_void,
1014 "To construct an Buffer<void>, pass a halide_type_t as the first argument to the constructor");
1015 int extents[] = {first, second, (int)rest...};
1016 buf.type = static_halide_type();
1017 constexpr int buf_dimensions = 2 + (int)(sizeof...(rest));
1018 make_static_shape_storage<buf_dimensions>();
1019 initialize_shape(extents);
1020 if (!Internal::any_zero(extents)) {
1022 allocate();
1023 }
1024 }
1025 // @}
1026
1027 /** Allocate a new image of unknown type using a vector of ints as the size. */
1028 Buffer(halide_type_t t, const std::vector<int> &sizes) {
1029 if (!T_is_void) {
1030 assert(static_halide_type() == t);
1031 }
1032 buf.type = t;
1033 // make_shape_storage() will do a runtime check that dimensionality matches.
1034 make_shape_storage((int)sizes.size());
1035 initialize_shape(sizes);
1036 if (!Internal::any_zero(sizes)) {
1038 allocate();
1039 }
1040 }
1041
1042 /** Allocate a new image of known type using a vector of ints as the size. */
1043 explicit Buffer(const std::vector<int> &sizes)
1044 : Buffer(static_halide_type(), sizes) {
1045 }
1046
1047private:
1048 // Create a copy of the sizes vector, ordered as specified by order.
1049 static std::vector<int> make_ordered_sizes(const std::vector<int> &sizes, const std::vector<int> &order) {
1050 assert(order.size() == sizes.size());
1051 std::vector<int> ordered_sizes(sizes.size());
1052 for (size_t i = 0; i < sizes.size(); ++i) {
1053 ordered_sizes[i] = sizes.at(order[i]);
1054 }
1055 return ordered_sizes;
1056 }
1057
1058public:
1059 /** Allocate a new image of unknown type using a vector of ints as the size and
1060 * a vector of indices indicating the storage order for each dimension. The
1061 * length of the sizes vector and the storage-order vector must match. For instance,
1062 * to allocate an interleaved RGB buffer, you would pass {2, 0, 1} for storage_order. */
1063 Buffer(halide_type_t t, const std::vector<int> &sizes, const std::vector<int> &storage_order)
1064 : Buffer(t, make_ordered_sizes(sizes, storage_order)) {
1065 transpose(storage_order);
1066 }
1067
1068 Buffer(const std::vector<int> &sizes, const std::vector<int> &storage_order)
1069 : Buffer(static_halide_type(), sizes, storage_order) {
1070 }
1071
1072 /** Make an Buffer that refers to a statically sized array. Does not
1073 * take ownership of the data, and does not set the host_dirty flag. */
1074 template<typename Array, size_t N>
1075 explicit Buffer(Array (&vals)[N]) {
1076 const int buf_dimensions = dimensionality_of_array(vals);
1077 buf.type = scalar_type_of_array(vals);
1078 buf.host = (uint8_t *)vals;
1079 make_shape_storage(buf_dimensions);
1080 initialize_shape_from_array_shape(buf.dimensions - 1, vals);
1081 }
1082
1083 /** Initialize an Buffer of runtime type from a pointer and some
1084 * sizes. Assumes dense row-major packing and a min coordinate of
1085 * zero. Does not take ownership of the data and does not set the
1086 * host_dirty flag. */
1087 template<typename... Args,
1088 typename = typename std::enable_if<AllInts<Args...>::value>::type>
1089 explicit Buffer(halide_type_t t, add_const_if_T_is_const<void> *data, int first, Args &&...rest) {
1090 if (!T_is_void) {
1091 assert(static_halide_type() == t);
1092 }
1093 int extents[] = {first, (int)rest...};
1094 buf.type = t;
1095 buf.host = (uint8_t *)const_cast<void *>(data);
1096 constexpr int buf_dimensions = 1 + (int)(sizeof...(rest));
1097 make_static_shape_storage<buf_dimensions>();
1098 initialize_shape(extents);
1099 }
1100
1101 /** Initialize an Buffer from a pointer and some sizes. Assumes
1102 * dense row-major packing and a min coordinate of zero. Does not
1103 * take ownership of the data and does not set the host_dirty flag. */
1104 template<typename... Args,
1105 typename = typename std::enable_if<AllInts<Args...>::value>::type>
1106 explicit Buffer(T *data, int first, Args &&...rest) {
1107 int extents[] = {first, (int)rest...};
1108 buf.type = static_halide_type();
1109 buf.host = (uint8_t *)const_cast<typename std::remove_const<T>::type *>(data);
1110 constexpr int buf_dimensions = 1 + (int)(sizeof...(rest));
1111 make_static_shape_storage<buf_dimensions>();
1112 initialize_shape(extents);
1113 }
1114
1115 /** Initialize an Buffer from a pointer and a vector of
1116 * sizes. Assumes dense row-major packing and a min coordinate of
1117 * zero. Does not take ownership of the data and does not set the
1118 * host_dirty flag. */
1119 explicit Buffer(T *data, const std::vector<int> &sizes) {
1120 buf.type = static_halide_type();
1121 buf.host = (uint8_t *)const_cast<typename std::remove_const<T>::type *>(data);
1122 make_shape_storage((int)sizes.size());
1123 initialize_shape(sizes);
1124 }
1125
1126 /** Initialize an Buffer of runtime type from a pointer and a
1127 * vector of sizes. Assumes dense row-major packing and a min
1128 * coordinate of zero. Does not take ownership of the data and
1129 * does not set the host_dirty flag. */
1130 explicit Buffer(halide_type_t t, add_const_if_T_is_const<void> *data, const std::vector<int> &sizes) {
1131 if (!T_is_void) {
1132 assert(static_halide_type() == t);
1133 }
1134 buf.type = t;
1135 buf.host = (uint8_t *)const_cast<void *>(data);
1136 make_shape_storage((int)sizes.size());
1137 initialize_shape(sizes);
1138 }
1139
1140 /** Initialize an Buffer from a pointer to the min coordinate and
1141 * an array describing the shape. Does not take ownership of the
1142 * data, and does not set the host_dirty flag. */
1143 explicit Buffer(halide_type_t t, add_const_if_T_is_const<void> *data, int d, const halide_dimension_t *shape) {
1144 if (!T_is_void) {
1145 assert(static_halide_type() == t);
1146 }
1147 buf.type = t;
1148 buf.host = (uint8_t *)const_cast<void *>(data);
1149 make_shape_storage(d);
1150 for (int i = 0; i < d; i++) {
1151 buf.dim[i] = shape[i];
1152 }
1153 }
1154
1155 /** Initialize a Buffer from a pointer to the min coordinate and
1156 * a vector describing the shape. Does not take ownership of the
1157 * data, and does not set the host_dirty flag. */
1158 explicit Buffer(halide_type_t t, add_const_if_T_is_const<void> *data,
1159 const std::vector<halide_dimension_t> &shape)
1160 : Buffer(t, data, (int)shape.size(), shape.data()) {
1161 }
1162
1163 /** Initialize an Buffer from a pointer to the min coordinate and
1164 * an array describing the shape. Does not take ownership of the
1165 * data and does not set the host_dirty flag. */
1166 explicit Buffer(T *data, int d, const halide_dimension_t *shape) {
1167 buf.type = static_halide_type();
1168 buf.host = (uint8_t *)const_cast<typename std::remove_const<T>::type *>(data);
1169 make_shape_storage(d);
1170 for (int i = 0; i < d; i++) {
1171 buf.dim[i] = shape[i];
1172 }
1173 }
1174
1175 /** Initialize a Buffer from a pointer to the min coordinate and
1176 * a vector describing the shape. Does not take ownership of the
1177 * data, and does not set the host_dirty flag. */
1178 explicit Buffer(T *data, const std::vector<halide_dimension_t> &shape)
1179 : Buffer(data, (int)shape.size(), shape.data()) {
1180 }
1181
1182 /** Destructor. Will release any underlying owned allocation if
1183 * this is the last reference to it. Will assert fail if there are
1184 * weak references to this Buffer outstanding. */
1186 decref();
1187 free_shape_storage();
1188 }
1189
1190 /** Get a pointer to the raw halide_buffer_t this wraps. */
1191 // @{
1193 return &buf;
1194 }
1195
1197 return &buf;
1198 }
1199 // @}
1200
1201 /** Provide a cast operator to halide_buffer_t *, so that
1202 * instances can be passed directly to Halide filters. */
1203 operator halide_buffer_t *() {
1204 return &buf;
1205 }
1206
1207 /** Return a typed reference to this Buffer. Useful for converting
1208 * a reference to a Buffer<void> to a reference to, for example, a
1209 * Buffer<const uint8_t>, or converting a Buffer<T>& to Buffer<const T>&.
1210 * You can also optionally sspecify a new value for Dims; this is useful
1211 * mainly for removing the dimensionality constraint on a Buffer with
1212 * explicit dimensionality. Does a runtime assert if the source buffer type
1213 * is void or the new dimensionality is incompatible. */
1214 template<typename T2, int D2 = Dims>
1219
1220 /** Return a const typed reference to this Buffer. Useful for converting
1221 * a reference to a Buffer<void> to a reference to, for example, a
1222 * Buffer<const uint8_t>, or converting a Buffer<T>& to Buffer<const T>&.
1223 * You can also optionally sspecify a new value for Dims; this is useful
1224 * mainly for removing the dimensionality constraint on a Buffer with
1225 * explicit dimensionality. Does a runtime assert if the source buffer type
1226 * is void or the new dimensionality is incompatible. */
1227 template<typename T2, int D2 = Dims>
1232
1233 /** Return an rval reference to this Buffer. Useful for converting
1234 * a reference to a Buffer<void> to a reference to, for example, a
1235 * Buffer<const uint8_t>, or converting a Buffer<T>& to Buffer<const T>&.
1236 * You can also optionally sspecify a new value for Dims; this is useful
1237 * mainly for removing the dimensionality constraint on a Buffer with
1238 * explicit dimensionality. Does a runtime assert if the source buffer type
1239 * is void or the new dimensionality is incompatible. */
1240 template<typename T2, int D2 = Dims>
1245
1246 /** as_const() is syntactic sugar for .as<const T>(), to avoid the need
1247 * to recapitulate the type argument. */
1248 // @{
1250 Buffer<typename std::add_const<T>::type, Dims, InClassDimStorage> &as_const() & {
1251 // Note that we can skip the assert_can_convert_from(), since T -> const T
1252 // conversion is always legal.
1253 return *((Buffer<typename std::add_const<T>::type, Dims, InClassDimStorage> *)this);
1254 }
1255
1257 const Buffer<typename std::add_const<T>::type, Dims, InClassDimStorage> &as_const() const & {
1258 return *((const Buffer<typename std::add_const<T>::type, Dims, InClassDimStorage> *)this);
1259 }
1260
1262 Buffer<typename std::add_const<T>::type, Dims, InClassDimStorage> as_const() && {
1263 return *((Buffer<typename std::add_const<T>::type, Dims, InClassDimStorage> *)this);
1264 }
1265 // @}
1266
1267 /** Add some syntactic sugar to allow autoconversion from Buffer<T> to Buffer<const T>& when
1268 * passing arguments */
1269 template<typename T2 = T, typename = typename std::enable_if<!std::is_const<T2>::value>::type>
1270 operator Buffer<typename std::add_const<T2>::type, Dims, InClassDimStorage> &() & {
1271 return as_const();
1272 }
1273
1274 /** Add some syntactic sugar to allow autoconversion from Buffer<T> to Buffer<void>& when
1275 * passing arguments */
1276 template<typename TVoid,
1277 typename T2 = T,
1278 typename = typename std::enable_if<std::is_same<TVoid, void>::value &&
1279 !std::is_void<T2>::value &&
1280 !std::is_const<T2>::value>::type>
1282 return as<TVoid, Dims>();
1283 }
1284
1285 /** Add some syntactic sugar to allow autoconversion from Buffer<const T> to Buffer<const void>& when
1286 * passing arguments */
1287 template<typename TVoid,
1288 typename T2 = T,
1289 typename = typename std::enable_if<std::is_same<TVoid, void>::value &&
1290 !std::is_void<T2>::value &&
1291 std::is_const<T2>::value>::type>
1295
1296 /** Conventional names for the first three dimensions. */
1297 // @{
1298 int width() const {
1299 return (dimensions() > 0) ? dim(0).extent() : 1;
1300 }
1301 int height() const {
1302 return (dimensions() > 1) ? dim(1).extent() : 1;
1303 }
1304 int channels() const {
1305 return (dimensions() > 2) ? dim(2).extent() : 1;
1306 }
1307 // @}
1308
1309 /** Conventional names for the min and max value of each dimension */
1310 // @{
1311 int left() const {
1312 return dim(0).min();
1313 }
1314
1315 int right() const {
1316 return dim(0).max();
1317 }
1318
1319 int top() const {
1320 return dim(1).min();
1321 }
1322
1323 int bottom() const {
1324 return dim(1).max();
1325 }
1326 // @}
1327
1328 /** Make a new image which is a deep copy of this image. Use crop
1329 * or slice followed by copy to make a copy of only a portion of
1330 * the image. The new image uses the same memory layout as the
1331 * original, with holes compacted away. Note that the returned
1332 * Buffer is always of a non-const type T (ie:
1333 *
1334 * Buffer<const T>.copy() -> Buffer<T> rather than Buffer<const T>
1335 *
1336 * which is always safe, since we are making a deep copy. (The caller
1337 * can easily cast it back to Buffer<const T> if desired, which is
1338 * always safe and free.)
1339 */
1340 Buffer<not_const_T, Dims, InClassDimStorage> copy(void *(*allocate_fn)(size_t) = nullptr,
1341 void (*deallocate_fn)(void *) = nullptr) const {
1343 dst.copy_from(*this);
1344 return dst;
1345 }
1346
1347 /** Like copy(), but the copy is created in interleaved memory layout
1348 * (vs. keeping the same memory layout as the original). Requires that 'this'
1349 * has exactly 3 dimensions.
1350 */
1352 void (*deallocate_fn)(void *) = nullptr) const {
1353 static_assert(Dims == AnyDims || Dims == 3);
1354 assert(dimensions() == 3);
1356 dst.set_min(min(0), min(1), min(2));
1357 dst.allocate(allocate_fn, deallocate_fn);
1358 dst.copy_from(*this);
1359 return dst;
1360 }
1361
1362 /** Like copy(), but the copy is created in planar memory layout
1363 * (vs. keeping the same memory layout as the original).
1364 */
1365 Buffer<not_const_T, Dims, InClassDimStorage> copy_to_planar(void *(*allocate_fn)(size_t) = nullptr,
1366 void (*deallocate_fn)(void *) = nullptr) const {
1367 std::vector<int> mins, extents;
1368 const int dims = dimensions();
1369 mins.reserve(dims);
1370 extents.reserve(dims);
1371 for (int d = 0; d < dims; ++d) {
1372 mins.push_back(dim(d).min());
1373 extents.push_back(dim(d).extent());
1374 }
1376 dst.set_min(mins);
1377 dst.allocate(allocate_fn, deallocate_fn);
1378 dst.copy_from(*this);
1379 return dst;
1380 }
1381
1382 /** Make a copy of the Buffer which shares the underlying host and/or device
1383 * allocations as the existing Buffer. This is purely syntactic sugar for
1384 * cases where you have a const reference to a Buffer but need a temporary
1385 * non-const copy (e.g. to make a call into AOT-generated Halide code), and want a terse
1386 * inline way to create a temporary. \code
1387 * void call_my_func(const Buffer<const uint8_t>& input) {
1388 * my_func(input.alias(), output);
1389 * }\endcode
1390 */
1392 return *this;
1393 }
1394
1395 /** Fill a Buffer with the values at the same coordinates in
1396 * another Buffer. Restricts itself to coordinates contained
1397 * within the intersection of the two buffers. If the two Buffers
1398 * are not in the same coordinate system, you will need to
1399 * translate the argument Buffer first. E.g. if you're blitting a
1400 * sprite onto a framebuffer, you'll want to translate the sprite
1401 * to the correct location first like so: \code
1402 * framebuffer.copy_from(sprite.translated({x, y})); \endcode
1403 */
1404 template<typename T2, int D2, int S2>
1406 static_assert(!std::is_const<T>::value, "Cannot call copy_from() on a Buffer<const T>");
1407 assert(!device_dirty() && "Cannot call Halide::Runtime::Buffer::copy_from on a device dirty destination.");
1408 assert(!src.device_dirty() && "Cannot call Halide::Runtime::Buffer::copy_from on a device dirty source.");
1409
1411
1412 static_assert(Dims == AnyDims || D2 == AnyDims || Dims == D2);
1413 assert(src.dimensions() == dst.dimensions());
1414
1415 // Trim the copy to the region in common
1416 const int d = dimensions();
1417 for (int i = 0; i < d; i++) {
1418 int min_coord = std::max(dst.dim(i).min(), src.dim(i).min());
1419 int max_coord = std::min(dst.dim(i).max(), src.dim(i).max());
1420 if (max_coord < min_coord) {
1421 // The buffers do not overlap.
1422 return;
1423 }
1424 dst.crop(i, min_coord, max_coord - min_coord + 1);
1425 src.crop(i, min_coord, max_coord - min_coord + 1);
1426 }
1427
1428 // If T is void, we need to do runtime dispatch to an
1429 // appropriately-typed lambda. We're copying, so we only care
1430 // about the element size. (If not, this should optimize away
1431 // into a static dispatch to the right-sized copy.)
1432 if (T_is_void ? (type().bytes() == 1) : (sizeof(not_void_T) == 1)) {
1433 using MemType = uint8_t;
1434 auto &typed_dst = (Buffer<MemType, Dims, InClassDimStorage> &)dst;
1435 auto &typed_src = (Buffer<const MemType, D2, S2> &)src;
1436 typed_dst.for_each_value([&](MemType &dst, MemType src) { dst = src; }, typed_src);
1437 } else if (T_is_void ? (type().bytes() == 2) : (sizeof(not_void_T) == 2)) {
1438 using MemType = uint16_t;
1439 auto &typed_dst = (Buffer<MemType, Dims, InClassDimStorage> &)dst;
1440 auto &typed_src = (Buffer<const MemType, D2, S2> &)src;
1441 typed_dst.for_each_value([&](MemType &dst, MemType src) { dst = src; }, typed_src);
1442 } else if (T_is_void ? (type().bytes() == 4) : (sizeof(not_void_T) == 4)) {
1443 using MemType = uint32_t;
1444 auto &typed_dst = (Buffer<MemType, Dims, InClassDimStorage> &)dst;
1445 auto &typed_src = (Buffer<const MemType, D2, S2> &)src;
1446 typed_dst.for_each_value([&](MemType &dst, MemType src) { dst = src; }, typed_src);
1447 } else if (T_is_void ? (type().bytes() == 8) : (sizeof(not_void_T) == 8)) {
1448 using MemType = uint64_t;
1449 auto &typed_dst = (Buffer<MemType, Dims, InClassDimStorage> &)dst;
1450 auto &typed_src = (Buffer<const MemType, D2, S2> &)src;
1451 typed_dst.for_each_value([&](MemType &dst, MemType src) { dst = src; }, typed_src);
1452 } else {
1453 assert(false && "type().bytes() must be 1, 2, 4, or 8");
1454 }
1456 }
1457
1458 /** Make an image that refers to a sub-range of this image along
1459 * the given dimension. Asserts that the crop region is within
1460 * the existing bounds: you cannot "crop outwards", even if you know there
1461 * is valid Buffer storage (e.g. because you already cropped inwards). */
1463 // Make a fresh copy of the underlying buffer (but not a fresh
1464 // copy of the allocation, if there is one).
1466
1467 // This guarantees the prexisting device ref is dropped if the
1468 // device_crop call fails and maintains the buffer in a consistent
1469 // state.
1470 im.device_deallocate();
1471
1472 im.crop_host(d, min, extent);
1473 if (buf.device_interface != nullptr) {
1474 complete_device_crop(im);
1475 }
1476 return im;
1477 }
1478
1479 /** Crop an image in-place along the given dimension. This does
1480 * not move any data around in memory - it just changes the min
1481 * and extent of the given dimension. */
1482 void crop(int d, int min, int extent) {
1483 // An optimization for non-device buffers. For the device case,
1484 // a temp buffer is required, so reuse the not-in-place version.
1485 // TODO(zalman|abadams): Are nop crops common enough to special
1486 // case the device part of the if to do nothing?
1487 if (buf.device_interface != nullptr) {
1488 *this = cropped(d, min, extent);
1489 } else {
1490 crop_host(d, min, extent);
1491 }
1492 }
1493
1494 /** Make an image that refers to a sub-rectangle of this image along
1495 * the first N dimensions. Asserts that the crop region is within
1496 * the existing bounds. The cropped image may drop any device handle
1497 * if the device_interface cannot accomplish the crop in-place. */
1498 Buffer<T, Dims, InClassDimStorage> cropped(const std::vector<std::pair<int, int>> &rect) const {
1499 // Make a fresh copy of the underlying buffer (but not a fresh
1500 // copy of the allocation, if there is one).
1502
1503 // This guarantees the prexisting device ref is dropped if the
1504 // device_crop call fails and maintains the buffer in a consistent
1505 // state.
1506 im.device_deallocate();
1507
1508 im.crop_host(rect);
1509 if (buf.device_interface != nullptr) {
1510 complete_device_crop(im);
1511 }
1512 return im;
1513 }
1514
1515 /** Crop an image in-place along the first N dimensions. This does
1516 * not move any data around in memory, nor does it free memory. It
1517 * just rewrites the min/extent of each dimension to refer to a
1518 * subregion of the same allocation. */
1519 void crop(const std::vector<std::pair<int, int>> &rect) {
1520 // An optimization for non-device buffers. For the device case,
1521 // a temp buffer is required, so reuse the not-in-place version.
1522 // TODO(zalman|abadams): Are nop crops common enough to special
1523 // case the device part of the if to do nothing?
1524 if (buf.device_interface != nullptr) {
1525 *this = cropped(rect);
1526 } else {
1527 crop_host(rect);
1528 }
1529 }
1530
1531 /** Make an image which refers to the same data with using
1532 * translated coordinates in the given dimension. Positive values
1533 * move the image data to the right or down relative to the
1534 * coordinate system. Drops any device handle. */
1537 im.translate(d, dx);
1538 return im;
1539 }
1540
1541 /** Translate an image in-place along one dimension by changing
1542 * how it is indexed. Does not move any data around in memory. */
1543 void translate(int d, int delta) {
1544 assert(d >= 0 && d < this->dimensions());
1546 buf.dim[d].min += delta;
1547 }
1548
1549 /** Make an image which refers to the same data translated along
1550 * the first N dimensions. */
1551 Buffer<T, Dims, InClassDimStorage> translated(const std::vector<int> &delta) const {
1553 im.translate(delta);
1554 return im;
1555 }
1556
1557 /** Translate an image along the first N dimensions by changing
1558 * how it is indexed. Does not move any data around in memory. */
1559 void translate(const std::vector<int> &delta) {
1561 assert(delta.size() <= static_cast<decltype(delta.size())>(std::numeric_limits<int>::max()));
1562 int limit = (int)delta.size();
1563 assert(limit <= dimensions());
1564 for (int i = 0; i < limit; i++) {
1565 translate(i, delta[i]);
1566 }
1567 }
1568
1569 /** Set the min coordinate of an image in the first N dimensions. */
1570 // @{
1571 void set_min(const std::vector<int> &mins) {
1572 assert(mins.size() <= static_cast<decltype(mins.size())>(dimensions()));
1574 for (size_t i = 0; i < mins.size(); i++) {
1575 buf.dim[i].min = mins[i];
1576 }
1577 }
1578
1579 template<typename... Args>
1580 void set_min(Args... args) {
1581 set_min(std::vector<int>{args...});
1582 }
1583 // @}
1584
1585 /** Test if a given coordinate is within the bounds of an image. */
1586 // @{
1587 bool contains(const std::vector<int> &coords) const {
1588 assert(coords.size() <= static_cast<decltype(coords.size())>(dimensions()));
1589 for (size_t i = 0; i < coords.size(); i++) {
1590 if (coords[i] < dim((int)i).min() || coords[i] > dim((int)i).max()) {
1591 return false;
1592 }
1593 }
1594 return true;
1595 }
1596
1597 template<typename... Args>
1598 bool contains(Args... args) const {
1599 return contains(std::vector<int>{args...});
1600 }
1601 // @}
1602
1603 /** Make a buffer which refers to the same data in the same layout
1604 * using a swapped indexing order for the dimensions given. So
1605 * A = B.transposed(0, 1) means that A(i, j) == B(j, i), and more
1606 * strongly that A.address_of(i, j) == B.address_of(j, i). */
1609 im.transpose(d1, d2);
1610 return im;
1611 }
1612
1613 /** Transpose a buffer in-place by changing how it is indexed. For
1614 * example, transpose(0, 1) on a two-dimensional buffer means that
1615 * the value referred to by coordinates (i, j) is now reached at
1616 * the coordinates (j, i), and vice versa. This is done by
1617 * reordering the per-dimension metadata rather than by moving
1618 * data around in memory, so other views of the same memory will
1619 * not see the data as having been transposed. */
1620 void transpose(int d1, int d2) {
1621 assert(d1 >= 0 && d1 < this->dimensions());
1622 assert(d2 >= 0 && d2 < this->dimensions());
1623 std::swap(buf.dim[d1], buf.dim[d2]);
1624 }
1625
1626 /** A generalized transpose: instead of swapping two dimensions,
1627 * pass a vector that lists each dimension index exactly once, in
1628 * the desired order. This does not move any data around in memory
1629 * - it just permutes how it is indexed. */
1630 void transpose(const std::vector<int> &order) {
1631 assert((int)order.size() == dimensions());
1632 if (dimensions() < 2) {
1633 // My, that was easy
1634 return;
1635 }
1636
1637 std::vector<int> order_sorted = order;
1638 for (size_t i = 1; i < order_sorted.size(); i++) {
1639 for (size_t j = i; j > 0 && order_sorted[j - 1] > order_sorted[j]; j--) {
1640 std::swap(order_sorted[j], order_sorted[j - 1]);
1641 transpose(j, j - 1);
1642 }
1643 }
1644 }
1645
1646 /** Make a buffer which refers to the same data in the same
1647 * layout using a different ordering of the dimensions. */
1648 Buffer<T, Dims, InClassDimStorage> transposed(const std::vector<int> &order) const {
1650 im.transpose(order);
1651 return im;
1652 }
1653
1654 /** Make a lower-dimensional buffer that refers to one slice of
1655 * this buffer. */
1656 Buffer<T, (Dims == AnyDims ? AnyDims : Dims - 1)>
1657 sliced(int d, int pos) const {
1658 static_assert(Dims == AnyDims || Dims > 0, "Cannot slice a 0-dimensional buffer");
1659 assert(dimensions() > 0);
1660
1662
1663 // This guarantees the prexisting device ref is dropped if the
1664 // device_slice call fails and maintains the buffer in a consistent
1665 // state.
1666 im.device_deallocate();
1667
1668 im.slice_host(d, pos);
1669 if (buf.device_interface != nullptr) {
1670 complete_device_slice(im, d, pos);
1671 }
1672 return im;
1673 }
1674
1675 /** Make a lower-dimensional buffer that refers to one slice of this
1676 * buffer at the dimension's minimum. */
1677 Buffer<T, (Dims == AnyDims ? AnyDims : Dims - 1)>
1678 sliced(int d) const {
1679 static_assert(Dims == AnyDims || Dims > 0, "Cannot slice a 0-dimensional buffer");
1680 assert(dimensions() > 0);
1681
1682 return sliced(d, dim(d).min());
1683 }
1684
1685 /** Rewrite the buffer to refer to a single lower-dimensional
1686 * slice of itself along the given dimension at the given
1687 * coordinate. Does not move any data around or free the original
1688 * memory, so other views of the same data are unaffected. Can
1689 * only be called on a Buffer with dynamic dimensionality. */
1690 void slice(int d, int pos) {
1691 static_assert(Dims == AnyDims, "Cannot call slice() on a Buffer with static dimensionality.");
1692 assert(dimensions() > 0);
1693
1694 // An optimization for non-device buffers. For the device case,
1695 // a temp buffer is required, so reuse the not-in-place version.
1696 // TODO(zalman|abadams): Are nop slices common enough to special
1697 // case the device part of the if to do nothing?
1698 if (buf.device_interface != nullptr) {
1699 *this = sliced(d, pos);
1700 } else {
1701 slice_host(d, pos);
1702 }
1703 }
1704
1705 /** Slice a buffer in-place at the dimension's minimum. */
1706 void slice(int d) {
1707 slice(d, dim(d).min());
1708 }
1709
1710 /** Make a new buffer that views this buffer as a single slice in a
1711 * higher-dimensional space. The new dimension has extent one and
1712 * the given min. This operation is the opposite of slice. As an
1713 * example, the following condition is true:
1714 *
1715 \code
1716 im2 = im.embedded(1, 17);
1717 &im(x, y, c) == &im2(x, 17, y, c);
1718 \endcode
1719 */
1720 Buffer<T, (Dims == AnyDims ? AnyDims : Dims + 1)>
1721 embedded(int d, int pos = 0) const {
1723 im.embed(d, pos);
1724 return im;
1725 }
1726
1727 /** Embed a buffer in-place, increasing the
1728 * dimensionality. */
1729 void embed(int d, int pos = 0) {
1730 static_assert(Dims == AnyDims, "Cannot call embed() on a Buffer with static dimensionality.");
1731 assert(d >= 0 && d <= dimensions());
1732 add_dimension();
1733 translate(dimensions() - 1, pos);
1734 for (int i = dimensions() - 1; i > d; i--) {
1735 transpose(i, i - 1);
1736 }
1737 }
1738
1739 /** Add a new dimension with a min of zero and an extent of
1740 * one. The stride is the extent of the outermost dimension times
1741 * its stride. The new dimension is the last dimension. This is a
1742 * special case of embed. */
1744 static_assert(Dims == AnyDims, "Cannot call add_dimension() on a Buffer with static dimensionality.");
1745 const int dims = buf.dimensions;
1746 buf.dimensions++;
1747 if (buf.dim != shape) {
1748 // We're already on the heap. Reallocate.
1749 halide_dimension_t *new_shape = new halide_dimension_t[buf.dimensions];
1750 for (int i = 0; i < dims; i++) {
1751 new_shape[i] = buf.dim[i];
1752 }
1753 delete[] buf.dim;
1754 buf.dim = new_shape;
1755 } else if (dims == InClassDimStorage) {
1756 // Transition from the in-class storage to the heap
1757 make_shape_storage(buf.dimensions);
1758 for (int i = 0; i < dims; i++) {
1759 buf.dim[i] = shape[i];
1760 }
1761 } else {
1762 // We still fit in the class
1763 }
1764 buf.dim[dims] = {0, 1, 0};
1765 if (dims == 0) {
1766 buf.dim[dims].stride = 1;
1767 } else {
1768 buf.dim[dims].stride = buf.dim[dims - 1].extent * buf.dim[dims - 1].stride;
1769 }
1770 }
1771
1772 /** Add a new dimension with a min of zero, an extent of one, and
1773 * the specified stride. The new dimension is the last
1774 * dimension. This is a special case of embed. */
1776 add_dimension();
1777 buf.dim[buf.dimensions - 1].stride = s;
1778 }
1779
1780 /** Methods for managing any GPU allocation. */
1781 // @{
1782 // Set the host dirty flag. Called by every operator()
1783 // access. Must be inlined so it can be hoisted out of loops.
1785 void set_host_dirty(bool v = true) {
1786 assert((!v || !device_dirty()) && "Cannot set host dirty when device is already dirty. Call copy_to_host() before accessing the buffer from host.");
1787 buf.set_host_dirty(v);
1788 }
1789
1790 // Check if the device allocation is dirty. Called by
1791 // set_host_dirty, which is called by every accessor. Must be
1792 // inlined so it can be hoisted out of loops.
1794 bool device_dirty() const {
1795 return buf.device_dirty();
1796 }
1797
1798 bool host_dirty() const {
1799 return buf.host_dirty();
1800 }
1801
1802 void set_device_dirty(bool v = true) {
1803 assert((!v || !host_dirty()) && "Cannot set device dirty when host is already dirty.");
1804 buf.set_device_dirty(v);
1805 }
1806
1807 int copy_to_host(void *ctx = nullptr) {
1808 if (device_dirty()) {
1809 return buf.device_interface->copy_to_host(ctx, &buf);
1810 }
1812 }
1813
1814 int copy_to_device(const struct halide_device_interface_t *device_interface, void *ctx = nullptr) {
1815 if (host_dirty()) {
1816 return device_interface->copy_to_device(ctx, &buf, device_interface);
1817 }
1819 }
1820
1821 int device_malloc(const struct halide_device_interface_t *device_interface, void *ctx = nullptr) {
1822 return device_interface->device_malloc(ctx, &buf, device_interface);
1823 }
1824
1825 int device_free(void *ctx = nullptr) {
1826 if (dev_ref_count) {
1827 assert(dev_ref_count->ownership == BufferDeviceOwnership::Allocated &&
1828 "Can't call device_free on an unmanaged or wrapped native device handle. "
1829 "Free the source allocation or call device_detach_native instead.");
1830 // Multiple people may be holding onto this dev field
1831 assert(dev_ref_count->count == 1 &&
1832 "Multiple Halide::Runtime::Buffer objects share this device "
1833 "allocation. Freeing it would create dangling references. "
1834 "Don't call device_free on Halide buffers that you have copied or "
1835 "passed by value.");
1836 }
1837 int ret = halide_error_code_success;
1838 if (buf.device_interface) {
1839 ret = buf.device_interface->device_free(ctx, &buf);
1840 }
1841 if (dev_ref_count) {
1842 delete dev_ref_count;
1843 dev_ref_count = nullptr;
1844 }
1845 return ret;
1846 }
1847
1848 int device_wrap_native(const struct halide_device_interface_t *device_interface,
1849 uint64_t handle, void *ctx = nullptr) {
1850 assert(device_interface);
1851 dev_ref_count = new DeviceRefCount;
1853 return device_interface->wrap_native(ctx, &buf, handle, device_interface);
1854 }
1855
1856 int device_detach_native(void *ctx = nullptr) {
1857 assert(dev_ref_count &&
1859 "Only call device_detach_native on buffers wrapping a native "
1860 "device handle via device_wrap_native. This buffer was allocated "
1861 "using device_malloc, or is unmanaged. "
1862 "Call device_free or free the original allocation instead.");
1863 // Multiple people may be holding onto this dev field
1864 assert(dev_ref_count->count == 1 &&
1865 "Multiple Halide::Runtime::Buffer objects share this device "
1866 "allocation. Freeing it could create dangling references. "
1867 "Don't call device_detach_native on Halide buffers that you "
1868 "have copied or passed by value.");
1869 int ret = halide_error_code_success;
1870 if (buf.device_interface) {
1871 ret = buf.device_interface->detach_native(ctx, &buf);
1872 }
1873 delete dev_ref_count;
1874 dev_ref_count = nullptr;
1875 return ret;
1876 }
1877
1878 int device_and_host_malloc(const struct halide_device_interface_t *device_interface, void *ctx = nullptr) {
1879 return device_interface->device_and_host_malloc(ctx, &buf, device_interface);
1880 }
1881
1882 int device_and_host_free(const struct halide_device_interface_t *device_interface, void *ctx = nullptr) {
1883 if (dev_ref_count) {
1885 "Can't call device_and_host_free on a device handle not allocated with device_and_host_malloc. "
1886 "Free the source allocation or call device_detach_native instead.");
1887 // Multiple people may be holding onto this dev field
1888 assert(dev_ref_count->count == 1 &&
1889 "Multiple Halide::Runtime::Buffer objects share this device "
1890 "allocation. Freeing it would create dangling references. "
1891 "Don't call device_and_host_free on Halide buffers that you have copied or "
1892 "passed by value.");
1893 }
1894 int ret = halide_error_code_success;
1895 if (buf.device_interface) {
1896 ret = buf.device_interface->device_and_host_free(ctx, &buf);
1897 }
1898 if (dev_ref_count) {
1899 delete dev_ref_count;
1900 dev_ref_count = nullptr;
1901 }
1902 return ret;
1903 }
1904
1905 int device_sync(void *ctx = nullptr) {
1906 return buf.device_sync(ctx);
1907 }
1908
1910 return buf.device != 0;
1911 }
1912
1913 /** Return the method by which the device field is managed. */
1915 if (dev_ref_count == nullptr) {
1917 }
1918 return dev_ref_count->ownership;
1919 }
1920 // @}
1921
1922 /** If you use the (x, y, c) indexing convention, then Halide
1923 * Buffers are stored planar by default. This function constructs
1924 * an interleaved RGB or RGBA image that can still be indexed
1925 * using (x, y, c). Passing it to a generator requires that the
1926 * generator has been compiled with support for interleaved (also
1927 * known as packed or chunky) memory layouts. */
1929 static_assert(Dims == AnyDims || Dims == 3, "make_interleaved() must be called on a Buffer that can represent 3 dimensions.");
1931 // Note that this is equivalent to calling transpose({2, 0, 1}),
1932 // but slightly more efficient.
1933 im.transpose(0, 1);
1934 im.transpose(1, 2);
1935 return im;
1936 }
1937
1938 /** If you use the (x, y, c) indexing convention, then Halide
1939 * Buffers are stored planar by default. This function constructs
1940 * an interleaved RGB or RGBA image that can still be indexed
1941 * using (x, y, c). Passing it to a generator requires that the
1942 * generator has been compiled with support for interleaved (also
1943 * known as packed or chunky) memory layouts. */
1947
1948 /** Wrap an existing interleaved image. */
1949 static Buffer<add_const_if_T_is_const<void>, Dims, InClassDimStorage>
1951 static_assert(Dims == AnyDims || Dims == 3, "make_interleaved() must be called on a Buffer that can represent 3 dimensions.");
1952 Buffer<add_const_if_T_is_const<void>, Dims, InClassDimStorage> im(t, data, channels, width, height);
1953 im.transpose(0, 1);
1954 im.transpose(1, 2);
1955 return im;
1956 }
1957
1958 /** Wrap an existing interleaved image. */
1962
1963 /** Make a zero-dimensional Buffer */
1965 static_assert(Dims == AnyDims || Dims == 0, "make_scalar() must be called on a Buffer that can represent 0 dimensions.");
1966 Buffer<add_const_if_T_is_const<void>, AnyDims, InClassDimStorage> buf(t, 1);
1967 buf.slice(0, 0);
1968 return buf;
1969 }
1970
1971 /** Make a zero-dimensional Buffer */
1973 static_assert(Dims == AnyDims || Dims == 0, "make_scalar() must be called on a Buffer that can represent 0 dimensions.");
1975 buf.slice(0, 0);
1976 return buf;
1977 }
1978
1979 /** Make a zero-dimensional Buffer that points to non-owned, existing data */
1981 static_assert(Dims == AnyDims || Dims == 0, "make_scalar() must be called on a Buffer that can represent 0 dimensions.");
1983 buf.slice(0, 0);
1984 return buf;
1985 }
1986
1987 /** Make a buffer with the same shape and memory nesting order as
1988 * another buffer. It may have a different type. */
1989 template<typename T2, int D2, int S2>
1991 void *(*allocate_fn)(size_t) = nullptr,
1992 void (*deallocate_fn)(void *) = nullptr) {
1993 static_assert(Dims == D2 || Dims == AnyDims);
1994 const halide_type_t dst_type = T_is_void ? src.type() : halide_type_of<typename std::remove_cv<not_void_T>::type>();
1995 return Buffer<>::make_with_shape_of_helper(dst_type, src.dimensions(), src.buf.dim,
1996 allocate_fn, deallocate_fn);
1997 }
1998
1999private:
2000 static Buffer<> make_with_shape_of_helper(halide_type_t dst_type,
2001 int dimensions,
2002 halide_dimension_t *shape,
2003 void *(*allocate_fn)(size_t),
2004 void (*deallocate_fn)(void *)) {
2005 // Reorder the dimensions of src to have strides in increasing order
2006 std::vector<int> swaps;
2007 for (int i = dimensions - 1; i > 0; i--) {
2008 for (int j = i; j > 0; j--) {
2009 if (shape[j - 1].stride > shape[j].stride) {
2010 std::swap(shape[j - 1], shape[j]);
2011 swaps.push_back(j);
2012 }
2013 }
2014 }
2015
2016 // Rewrite the strides to be dense (this messes up src, which
2017 // is why we took it by value).
2018 for (int i = 0; i < dimensions; i++) {
2019 if (i == 0) {
2020 shape[i].stride = 1;
2021 } else {
2022 shape[i].stride = shape[i - 1].extent * shape[i - 1].stride;
2023 }
2024 }
2025
2026 // Undo the dimension reordering
2027 while (!swaps.empty()) {
2028 int j = swaps.back();
2029 std::swap(shape[j - 1], shape[j]);
2030 swaps.pop_back();
2031 }
2032
2033 // Use an explicit runtime type, and make dst a Buffer<void>, to allow
2034 // using this method with Buffer<void> for either src or dst.
2035 Buffer<> dst(dst_type, nullptr, dimensions, shape);
2036 dst.allocate(allocate_fn, deallocate_fn);
2037
2038 return dst;
2039 }
2040
2041 template<typename... Args>
2043 ptrdiff_t
2044 offset_of(int d, int first, Args... rest) const {
2045#if HALIDE_RUNTIME_BUFFER_CHECK_INDICES
2046 assert(first >= this->buf.dim[d].min);
2047 assert(first < this->buf.dim[d].min + this->buf.dim[d].extent);
2048#endif
2049 return offset_of(d + 1, rest...) + (ptrdiff_t)this->buf.dim[d].stride * (first - this->buf.dim[d].min);
2050 }
2051
2053 ptrdiff_t offset_of(int d) const {
2054 return 0;
2055 }
2056
2057 template<typename... Args>
2059 storage_T *
2060 address_of(Args... args) const {
2061 if (T_is_void) {
2062 return (storage_T *)(this->buf.host) + offset_of(0, args...) * type().bytes();
2063 } else {
2064 return (storage_T *)(this->buf.host) + offset_of(0, args...);
2065 }
2066 }
2067
2069 ptrdiff_t offset_of(const int *pos) const {
2070 ptrdiff_t offset = 0;
2071 for (int i = this->dimensions() - 1; i >= 0; i--) {
2072#if HALIDE_RUNTIME_BUFFER_CHECK_INDICES
2073 assert(pos[i] >= this->buf.dim[i].min);
2074 assert(pos[i] < this->buf.dim[i].min + this->buf.dim[i].extent);
2075#endif
2076 offset += (ptrdiff_t)this->buf.dim[i].stride * (pos[i] - this->buf.dim[i].min);
2077 }
2078 return offset;
2079 }
2080
2082 storage_T *address_of(const int *pos) const {
2083 if (T_is_void) {
2084 return (storage_T *)this->buf.host + offset_of(pos) * type().bytes();
2085 } else {
2086 return (storage_T *)this->buf.host + offset_of(pos);
2087 }
2088 }
2089
2090public:
2091 /** Get a pointer to the address of the min coordinate. */
2092 T *data() const {
2093 return (T *)(this->buf.host);
2094 }
2095
2096 /** Access elements. Use im(...) to get a reference to an element,
2097 * and use &im(...) to get the address of an element. If you pass
2098 * fewer arguments than the buffer has dimensions, the rest are
2099 * treated as their min coordinate. The non-const versions set the
2100 * host_dirty flag to true.
2101 */
2102 //@{
2103 template<typename... Args,
2104 typename = typename std::enable_if<AllInts<Args...>::value>::type>
2105 HALIDE_ALWAYS_INLINE const not_void_T &operator()(int first, Args... rest) const {
2106 static_assert(!T_is_void,
2107 "Cannot use operator() on Buffer<void> types");
2108 constexpr int expected_dims = 1 + (int)(sizeof...(rest));
2109 static_assert(Dims == AnyDims || Dims == expected_dims, "Buffer with static dimensions was accessed with the wrong number of coordinates in operator()");
2110 assert(!device_dirty());
2111 return *((const not_void_T *)(address_of(first, rest...)));
2112 }
2113
2115 const not_void_T &
2116 operator()() const {
2117 static_assert(!T_is_void,
2118 "Cannot use operator() on Buffer<void> types");
2119 constexpr int expected_dims = 0;
2120 static_assert(Dims == AnyDims || Dims == expected_dims, "Buffer with static dimensions was accessed with the wrong number of coordinates in operator()");
2121 assert(!device_dirty());
2122 return *((const not_void_T *)(data()));
2123 }
2124
2126 const not_void_T &
2127 operator()(const int *pos) const {
2128 static_assert(!T_is_void,
2129 "Cannot use operator() on Buffer<void> types");
2130 assert(!device_dirty());
2131 return *((const not_void_T *)(address_of(pos)));
2132 }
2133
2134 template<typename... Args,
2135 typename = typename std::enable_if<AllInts<Args...>::value>::type>
2137 not_void_T &
2138 operator()(int first, Args... rest) {
2139 static_assert(!T_is_void,
2140 "Cannot use operator() on Buffer<void> types");
2141 constexpr int expected_dims = 1 + (int)(sizeof...(rest));
2142 static_assert(Dims == AnyDims || Dims == expected_dims, "Buffer with static dimensions was accessed with the wrong number of coordinates in operator()");
2144 return *((not_void_T *)(address_of(first, rest...)));
2145 }
2146
2148 not_void_T &
2150 static_assert(!T_is_void,
2151 "Cannot use operator() on Buffer<void> types");
2152 constexpr int expected_dims = 0;
2153 static_assert(Dims == AnyDims || Dims == expected_dims, "Buffer with static dimensions was accessed with the wrong number of coordinates in operator()");
2155 return *((not_void_T *)(data()));
2156 }
2157
2159 not_void_T &
2160 operator()(const int *pos) {
2161 static_assert(!T_is_void,
2162 "Cannot use operator() on Buffer<void> types");
2164 return *((not_void_T *)(address_of(pos)));
2165 }
2166 // @}
2167
2168 /** Tests that all values in this buffer are equal to val. */
2169 bool all_equal(not_void_T val) const {
2170 bool all_equal = true;
2171 for_each_element([&](const int *pos) { all_equal &= (*this)(pos) == val; });
2172 return all_equal;
2173 }
2174
2177 for_each_value([=](T &v) { v = val; });
2178 return *this;
2179 }
2180
2181private:
2182 /** Helper functions for for_each_value. */
2183 // @{
2184 template<int N>
2185 struct for_each_value_task_dim {
2186 std::ptrdiff_t extent;
2187 std::ptrdiff_t stride[N];
2188 };
2189
2190 // Given an array of strides, and a bunch of pointers to pointers
2191 // (all of different types), advance the pointers using the
2192 // strides.
2193 template<typename Ptr, typename... Ptrs>
2194 HALIDE_ALWAYS_INLINE static void advance_ptrs(const std::ptrdiff_t *stride, Ptr &ptr, Ptrs &...ptrs) {
2195 ptr += *stride;
2196 advance_ptrs(stride + 1, ptrs...);
2197 }
2198
2200 static void advance_ptrs(const std::ptrdiff_t *) {
2201 }
2202
2203 template<typename Fn, typename Ptr, typename... Ptrs>
2204 HALIDE_NEVER_INLINE static void for_each_value_helper(Fn &&f, int d, bool innermost_strides_are_one,
2205 const for_each_value_task_dim<sizeof...(Ptrs) + 1> *t, Ptr ptr, Ptrs... ptrs) {
2206 if (d == 0) {
2207 if (innermost_strides_are_one) {
2208 Ptr end = ptr + t[0].extent;
2209 while (ptr != end) {
2210 f(*ptr++, (*ptrs++)...);
2211 }
2212 } else {
2213 for (std::ptrdiff_t i = t[0].extent; i != 0; i--) {
2214 f(*ptr, (*ptrs)...);
2215 advance_ptrs(t[0].stride, ptr, ptrs...);
2216 }
2217 }
2218 } else {
2219 for (std::ptrdiff_t i = t[d].extent; i != 0; i--) {
2220 for_each_value_helper(f, d - 1, innermost_strides_are_one, t, ptr, ptrs...);
2221 advance_ptrs(t[d].stride, ptr, ptrs...);
2222 }
2223 }
2224 }
2225
2226 // Return pair is <new_dimensions, innermost_strides_are_one>
2227 template<int N>
2228 HALIDE_NEVER_INLINE static std::pair<int, bool> for_each_value_prep(for_each_value_task_dim<N> *t,
2229 const halide_buffer_t **buffers) {
2230 const int dimensions = buffers[0]->dimensions;
2231 assert(dimensions > 0);
2232
2233 // Check the buffers all have clean host allocations
2234 for (int i = 0; i < N; i++) {
2235 if (buffers[i]->device) {
2236 assert(buffers[i]->host &&
2237 "Buffer passed to for_each_value has device allocation but no host allocation. Call allocate() and copy_to_host() first");
2238 assert(!buffers[i]->device_dirty() &&
2239 "Buffer passed to for_each_value is dirty on device. Call copy_to_host() first");
2240 } else {
2241 assert(buffers[i]->host &&
2242 "Buffer passed to for_each_value has no host or device allocation");
2243 }
2244 }
2245
2246 // Extract the strides in all the dimensions
2247 for (int i = 0; i < dimensions; i++) {
2248 for (int j = 0; j < N; j++) {
2249 assert(buffers[j]->dimensions == dimensions);
2250 assert(buffers[j]->dim[i].extent == buffers[0]->dim[i].extent &&
2251 buffers[j]->dim[i].min == buffers[0]->dim[i].min);
2252 const int s = buffers[j]->dim[i].stride;
2253 t[i].stride[j] = s;
2254 }
2255 t[i].extent = buffers[0]->dim[i].extent;
2256
2257 // Order the dimensions by stride, so that the traversal is cache-coherent.
2258 // Use the last dimension for this, because this is the source in copies.
2259 // It appears to be better to optimize read order than write order.
2260 for (int j = i; j > 0 && t[j].stride[N - 1] < t[j - 1].stride[N - 1]; j--) {
2261 std::swap(t[j], t[j - 1]);
2262 }
2263 }
2264
2265 // flatten dimensions where possible to make a larger inner
2266 // loop for autovectorization.
2267 int d = dimensions;
2268 for (int i = 1; i < d; i++) {
2269 bool flat = true;
2270 for (int j = 0; j < N; j++) {
2271 flat = flat && t[i - 1].stride[j] * t[i - 1].extent == t[i].stride[j];
2272 }
2273 if (flat) {
2274 t[i - 1].extent *= t[i].extent;
2275 for (int j = i; j < d - 1; j++) {
2276 t[j] = t[j + 1];
2277 }
2278 i--;
2279 d--;
2280 }
2281 }
2282
2283 // Note that we assert() that dimensions > 0 above
2284 // (our one-and-only caller will only call us that way)
2285 // so the unchecked access to t[0] should be safe.
2286 bool innermost_strides_are_one = true;
2287 for (int i = 0; i < N; i++) {
2288 innermost_strides_are_one &= (t[0].stride[i] == 1);
2289 }
2290
2291 return {d, innermost_strides_are_one};
2292 }
2293
2294 template<typename Fn, typename... Args, int N = sizeof...(Args) + 1>
2295 void for_each_value_impl(Fn &&f, Args &&...other_buffers) const {
2296 if (dimensions() > 0) {
2297 const size_t alloc_size = dimensions() * sizeof(for_each_value_task_dim<N>);
2298 Buffer<>::for_each_value_task_dim<N> *t =
2299 (Buffer<>::for_each_value_task_dim<N> *)HALIDE_ALLOCA(alloc_size);
2300 // Move the preparatory code into a non-templated helper to
2301 // save code size.
2302 const halide_buffer_t *buffers[] = {&buf, (&other_buffers.buf)...};
2303 auto [new_dims, innermost_strides_are_one] = Buffer<>::for_each_value_prep(t, buffers);
2304 if (new_dims > 0) {
2305 Buffer<>::for_each_value_helper(f, new_dims - 1,
2306 innermost_strides_are_one,
2307 t,
2308 data(), (other_buffers.data())...);
2309 return;
2310 }
2311 // else fall thru
2312 }
2313
2314 // zero-dimensional case
2315 f(*data(), (*other_buffers.data())...);
2316 }
2317 // @}
2318
2319public:
2320 /** Call a function on every value in the buffer, and the
2321 * corresponding values in some number of other buffers of the
2322 * same size. The function should take a reference, const
2323 * reference, or value of the correct type for each buffer. This
2324 * effectively lifts a function of scalars to an element-wise
2325 * function of buffers. This produces code that the compiler can
2326 * autovectorize. This is slightly cheaper than for_each_element,
2327 * because it does not need to track the coordinates.
2328 *
2329 * Note that constness of Buffers is preserved: a const Buffer<T> (for either
2330 * 'this' or the other-buffers arguments) will allow mutation of the
2331 * buffer contents, while a Buffer<const T> will not. Attempting to specify
2332 * a mutable reference for the lambda argument of a Buffer<const T>
2333 * will result in a compilation error. */
2334 // @{
2335 template<typename Fn, typename... Args, int N = sizeof...(Args) + 1>
2336 HALIDE_ALWAYS_INLINE const Buffer<T, Dims, InClassDimStorage> &for_each_value(Fn &&f, Args &&...other_buffers) const {
2337 for_each_value_impl(f, std::forward<Args>(other_buffers)...);
2338 return *this;
2339 }
2340
2341 template<typename Fn, typename... Args, int N = sizeof...(Args) + 1>
2344 for_each_value(Fn &&f, Args &&...other_buffers) {
2345 for_each_value_impl(f, std::forward<Args>(other_buffers)...);
2346 return *this;
2347 }
2348 // @}
2349
2350private:
2351 // Helper functions for for_each_element
2352 struct for_each_element_task_dim {
2353 int min, max;
2354 };
2355
2356 /** If f is callable with this many args, call it. The first
2357 * argument is just to make the overloads distinct. Actual
2358 * overload selection is done using the enable_if. */
2359 template<typename Fn,
2360 typename... Args,
2361 typename = decltype(std::declval<Fn>()(std::declval<Args>()...))>
2362 HALIDE_ALWAYS_INLINE static void for_each_element_variadic(int, int, const for_each_element_task_dim *, Fn &&f, Args... args) {
2363 f(args...);
2364 }
2365
2366 /** If the above overload is impossible, we add an outer loop over
2367 * an additional argument and try again. */
2368 template<typename Fn,
2369 typename... Args>
2370 HALIDE_ALWAYS_INLINE static void for_each_element_variadic(double, int d, const for_each_element_task_dim *t, Fn &&f, Args... args) {
2371 for (int i = t[d].min; i <= t[d].max; i++) {
2372 for_each_element_variadic(0, d - 1, t, std::forward<Fn>(f), i, args...);
2373 }
2374 }
2375
2376 /** Determine the minimum number of arguments a callable can take
2377 * using the same trick. */
2378 template<typename Fn,
2379 typename... Args,
2380 typename = decltype(std::declval<Fn>()(std::declval<Args>()...))>
2381 HALIDE_ALWAYS_INLINE static int num_args(int, Fn &&, Args...) {
2382 return (int)(sizeof...(Args));
2383 }
2384
2385 /** The recursive version is only enabled up to a recursion limit
2386 * of 256. This catches callables that aren't callable with any
2387 * number of ints. */
2388 template<typename Fn,
2389 typename... Args>
2390 HALIDE_ALWAYS_INLINE static int num_args(double, Fn &&f, Args... args) {
2391 static_assert(sizeof...(args) <= 256,
2392 "Callable passed to for_each_element must accept either a const int *,"
2393 " or up to 256 ints. No such operator found. Expect infinite template recursion.");
2394 return num_args(0, std::forward<Fn>(f), 0, args...);
2395 }
2396
2397 /** A version where the callable takes a position array instead,
2398 * with compile-time recursion on the dimensionality. This
2399 * overload is preferred to the one below using the same int vs
2400 * double trick as above, but is impossible once d hits -1 using
2401 * std::enable_if. */
2402 template<int d,
2403 typename Fn,
2404 typename = typename std::enable_if<(d >= 0)>::type>
2405 HALIDE_ALWAYS_INLINE static void for_each_element_array_helper(int, const for_each_element_task_dim *t, Fn &&f, int *pos) {
2406 for (pos[d] = t[d].min; pos[d] <= t[d].max; pos[d]++) {
2407 for_each_element_array_helper<d - 1>(0, t, std::forward<Fn>(f), pos);
2408 }
2409 }
2410
2411 /** Base case for recursion above. */
2412 template<int d,
2413 typename Fn,
2414 typename = typename std::enable_if<(d < 0)>::type>
2415 HALIDE_ALWAYS_INLINE static void for_each_element_array_helper(double, const for_each_element_task_dim *t, Fn &&f, int *pos) {
2416 f(pos);
2417 }
2418
2419 /** A run-time-recursive version (instead of
2420 * compile-time-recursive) that requires the callable to take a
2421 * pointer to a position array instead. Dispatches to the
2422 * compile-time-recursive version once the dimensionality gets
2423 * small. */
2424 template<typename Fn>
2425 static void for_each_element_array(int d, const for_each_element_task_dim *t, Fn &&f, int *pos) {
2426 if (d == -1) {
2427 f(pos);
2428 } else if (d == 0) {
2429 // Once the dimensionality gets small enough, dispatch to
2430 // a compile-time-recursive version for better codegen of
2431 // the inner loops.
2432 for_each_element_array_helper<0, Fn>(0, t, std::forward<Fn>(f), pos);
2433 } else if (d == 1) {
2434 for_each_element_array_helper<1, Fn>(0, t, std::forward<Fn>(f), pos);
2435 } else if (d == 2) {
2436 for_each_element_array_helper<2, Fn>(0, t, std::forward<Fn>(f), pos);
2437 } else if (d == 3) {
2438 for_each_element_array_helper<3, Fn>(0, t, std::forward<Fn>(f), pos);
2439 } else {
2440 for (pos[d] = t[d].min; pos[d] <= t[d].max; pos[d]++) {
2441 for_each_element_array(d - 1, t, std::forward<Fn>(f), pos);
2442 }
2443 }
2444 }
2445
2446 /** We now have two overloads for for_each_element. This one
2447 * triggers if the callable takes a const int *.
2448 */
2449 template<typename Fn,
2450 typename = decltype(std::declval<Fn>()((const int *)nullptr))>
2451 static void for_each_element(int, int dims, const for_each_element_task_dim *t, Fn &&f, int check = 0) {
2452 const int size = dims * sizeof(int);
2453 int *pos = (int *)HALIDE_ALLOCA(size);
2454 // At least one version of GCC will (incorrectly) report that pos "may be used uninitialized".
2455 // Add this memset to silence it.
2456 memset(pos, 0, size);
2457 for_each_element_array(dims - 1, t, std::forward<Fn>(f), pos);
2458 }
2459
2460 /** This one triggers otherwise. It treats the callable as
2461 * something that takes some number of ints. */
2462 template<typename Fn>
2463 HALIDE_ALWAYS_INLINE static void for_each_element(double, int dims, const for_each_element_task_dim *t, Fn &&f) {
2464 int args = num_args(0, std::forward<Fn>(f));
2465 assert(dims >= args);
2466 for_each_element_variadic(0, args - 1, t, std::forward<Fn>(f));
2467 }
2468
2469 template<typename Fn>
2470 void for_each_element_impl(Fn &&f) const {
2471 for_each_element_task_dim *t =
2472 (for_each_element_task_dim *)HALIDE_ALLOCA(dimensions() * sizeof(for_each_element_task_dim));
2473 for (int i = 0; i < dimensions(); i++) {
2474 t[i].min = dim(i).min();
2475 t[i].max = dim(i).max();
2476 }
2477 for_each_element(0, dimensions(), t, std::forward<Fn>(f));
2478 }
2479
2480public:
2481 /** Call a function at each site in a buffer. This is likely to be
2482 * much slower than using Halide code to populate a buffer, but is
2483 * convenient for tests. If the function has more arguments than the
2484 * buffer has dimensions, the remaining arguments will be zero. If it
2485 * has fewer arguments than the buffer has dimensions then the last
2486 * few dimensions of the buffer are not iterated over. For example,
2487 * the following code exploits this to set a floating point RGB image
2488 * to red:
2489
2490 \code
2491 Buffer<float, 3> im(100, 100, 3);
2492 im.for_each_element([&](int x, int y) {
2493 im(x, y, 0) = 1.0f;
2494 im(x, y, 1) = 0.0f;
2495 im(x, y, 2) = 0.0f:
2496 });
2497 \endcode
2498
2499 * The compiled code is equivalent to writing the a nested for loop,
2500 * and compilers are capable of optimizing it in the same way.
2501 *
2502 * If the callable can be called with an int * as the sole argument,
2503 * that version is called instead. Each location in the buffer is
2504 * passed to it in a coordinate array. This version is higher-overhead
2505 * than the variadic version, but is useful for writing generic code
2506 * that accepts buffers of arbitrary dimensionality. For example, the
2507 * following sets the value at all sites in an arbitrary-dimensional
2508 * buffer to their first coordinate:
2509
2510 \code
2511 im.for_each_element([&](const int *pos) {im(pos) = pos[0];});
2512 \endcode
2513
2514 * It is also possible to use for_each_element to iterate over entire
2515 * rows or columns by cropping the buffer to a single column or row
2516 * respectively and iterating over elements of the result. For example,
2517 * to set the diagonal of the image to 1 by iterating over the columns:
2518
2519 \code
2520 Buffer<float, 3> im(100, 100, 3);
2521 im.sliced(1, 0).for_each_element([&](int x, int c) {
2522 im(x, x, c) = 1.0f;
2523 });
2524 \endcode
2525
2526 * Or, assuming the memory layout is known to be dense per row, one can
2527 * memset each row of an image like so:
2528
2529 \code
2530 Buffer<float, 3> im(100, 100, 3);
2531 im.sliced(0, 0).for_each_element([&](int y, int c) {
2532 memset(&im(0, y, c), 0, sizeof(float) * im.width());
2533 });
2534 \endcode
2535
2536 */
2537 // @{
2538 template<typename Fn>
2540 for_each_element_impl(f);
2541 return *this;
2542 }
2543
2544 template<typename Fn>
2548 for_each_element_impl(f);
2549 return *this;
2550 }
2551 // @}
2552
2553private:
2554 template<typename Fn>
2555 struct FillHelper {
2556 Fn f;
2558
2559 template<typename... Args,
2560 typename = decltype(std::declval<Fn>()(std::declval<Args>()...))>
2561 void operator()(Args... args) {
2562 (*buf)(args...) = f(args...);
2563 }
2564
2565 FillHelper(Fn &&f, Buffer<T, Dims, InClassDimStorage> *buf)
2566 : f(std::forward<Fn>(f)), buf(buf) {
2567 }
2568 };
2569
2570public:
2571 /** Fill a buffer by evaluating a callable at every site. The
2572 * callable should look much like a callable passed to
2573 * for_each_element, but it should return the value that should be
2574 * stored to the coordinate corresponding to the arguments. */
2575 template<typename Fn,
2576 typename = typename std::enable_if<!std::is_arithmetic<typename std::decay<Fn>::type>::value>::type>
2578 // We'll go via for_each_element. We need a variadic wrapper lambda.
2579 FillHelper<Fn> wrapper(std::forward<Fn>(f), this);
2580 return for_each_element(wrapper);
2581 }
2582
2583 /** Check if an input buffer passed extern stage is a querying
2584 * bounds. Compared to doing the host pointer check directly,
2585 * this both adds clarity to code and will facilitate moving to
2586 * another representation for bounds query arguments. */
2587 bool is_bounds_query() const {
2588 return buf.is_bounds_query();
2589 }
2590
2591 /** Convenient check to verify that all of the interesting bytes in the Buffer
2592 * are initialized under MSAN. Note that by default, we use for_each_value() here so that
2593 * we skip any unused padding that isn't part of the Buffer; this isn't efficient,
2594 * but in MSAN mode, it doesn't matter. (Pass true for the flag to force check
2595 * the entire Buffer storage.) */
2596 void msan_check_mem_is_initialized(bool entire = false) const {
2597#if defined(__has_feature)
2598#if __has_feature(memory_sanitizer)
2599 if (entire) {
2600 __msan_check_mem_is_initialized(data(), size_in_bytes());
2601 } else {
2602 for_each_value([](T &v) { __msan_check_mem_is_initialized(&v, sizeof(T)); ; });
2603 }
2604#endif
2605#endif
2606 }
2607};
2608
2609} // namespace Runtime
2610} // namespace Halide
2611
2612#undef HALIDE_ALLOCA
2613
2614#endif // HALIDE_RUNTIME_IMAGE_H
#define HALIDE_RUNTIME_BUFFER_ALLOCATION_ALIGNMENT
#define HALIDE_ALLOCA
This file declares the routines used by Halide internally in its runtime.
#define HALIDE_NEVER_INLINE
@ halide_error_code_success
There was no error.
#define HALIDE_ALWAYS_INLINE
struct halide_buffer_t halide_buffer_t
The raw representation of an image passed around by generated Halide code.
Read-only access to the shape.
HALIDE_ALWAYS_INLINE int min() const
The lowest coordinate in this dimension.
Dimension(const halide_dimension_t &dim)
HALIDE_ALWAYS_INLINE int max() const
The highest coordinate in this dimension.
HALIDE_ALWAYS_INLINE iterator end() const
An iterator that points to one past the max coordinate.
HALIDE_ALWAYS_INLINE int stride() const
The number of elements in memory you have to step over to increment this coordinate by one.
HALIDE_ALWAYS_INLINE iterator begin() const
An iterator that points to the min coordinate.
HALIDE_ALWAYS_INLINE int extent() const
The extent of the image along this dimension.
A templated Buffer class that wraps halide_buffer_t and adds functionality.
Buffer< T, Dims, InClassDimStorage > & operator=(const Buffer< T2, D2, S2 > &other)
Assign from another Buffer of possibly-different dimensionality and type.
Buffer< not_const_T, Dims, InClassDimStorage > copy_to_planar(void *(*allocate_fn)(size_t)=nullptr, void(*deallocate_fn)(void *)=nullptr) const
Like copy(), but the copy is created in planar memory layout (vs.
Buffer< T, Dims, InClassDimStorage > transposed(const std::vector< int > &order) const
Make a buffer which refers to the same data in the same layout using a different ordering of the dime...
void translate(int d, int delta)
Translate an image in-place along one dimension by changing how it is indexed.
Buffer(const halide_buffer_t &buf, BufferDeviceOwnership ownership=BufferDeviceOwnership::Unmanaged)
Make a Buffer from a halide_buffer_t.
void allocate(void *(*allocate_fn)(size_t)=nullptr, void(*deallocate_fn)(void *)=nullptr)
Allocate memory for this Buffer.
Buffer< not_const_T, Dims, InClassDimStorage > copy(void *(*allocate_fn)(size_t)=nullptr, void(*deallocate_fn)(void *)=nullptr) const
Make a new image which is a deep copy of this image.
Buffer< T,(Dims==AnyDims ? AnyDims :Dims+1)> embedded(int d, int pos=0) const
Make a new buffer that views this buffer as a single slice in a higher-dimensional space.
friend class Buffer
Give Buffers access to the members of Buffers of different dimensionalities and types.
void add_dimension()
Add a new dimension with a min of zero and an extent of one.
void slice(int d)
Slice a buffer in-place at the dimension's minimum.
static void set_default_allocate_fn(void *(*allocate_fn)(size_t))
bool owns_host_memory() const
Does this Buffer own the host memory it refers to?
int width() const
Conventional names for the first three dimensions.
void transpose(const std::vector< int > &order)
A generalized transpose: instead of swapping two dimensions, pass a vector that lists each dimension ...
void set_min(const std::vector< int > &mins)
Set the min coordinate of an image in the first N dimensions.
HALIDE_ALWAYS_INLINE Buffer< T, Dims, InClassDimStorage > & for_each_element(Fn &&f)
Buffer(halide_type_t t, add_const_if_T_is_const< void > *data, const std::vector< int > &sizes)
Initialize an Buffer of runtime type from a pointer and a vector of sizes.
HALIDE_ALWAYS_INLINE Buffer< T2, D2, InClassDimStorage > as() &&
Return an rval reference to this Buffer.
int copy_to_host(void *ctx=nullptr)
Buffer(halide_type_t t, const std::vector< int > &sizes)
Allocate a new image of unknown type using a vector of ints as the size.
int device_malloc(const struct halide_device_interface_t *device_interface, void *ctx=nullptr)
int device_free(void *ctx=nullptr)
int extent(int i) const
bool contains(Args... args) const
void crop(const std::vector< std::pair< int, int > > &rect)
Crop an image in-place along the first N dimensions.
HALIDE_ALWAYS_INLINE const Buffer< typename std::add_const< T >::type, Dims, InClassDimStorage > & as_const() const &
void set_device_dirty(bool v=true)
HALIDE_ALWAYS_INLINE const not_void_T & operator()(const int *pos) const
Buffer(T *data, int d, const halide_dimension_t *shape)
Initialize an Buffer from a pointer to the min coordinate and an array describing the shape.
Buffer(Buffer< T2, D2, S2 > &&other)
Move-construct a Buffer from a Buffer of different dimensionality and type.
void slice(int d, int pos)
Rewrite the buffer to refer to a single lower-dimensional slice of itself along the given dimension a...
HALIDE_ALWAYS_INLINE const not_void_T & operator()(int first, Args... rest) const
Access elements.
HALIDE_ALWAYS_INLINE void set_host_dirty(bool v=true)
Methods for managing any GPU allocation.
void msan_check_mem_is_initialized(bool entire=false) const
Convenient check to verify that all of the interesting bytes in the Buffer are initialized under MSAN...
HALIDE_ALWAYS_INLINE Buffer< typename std::add_const< T >::type, Dims, InClassDimStorage > as_const() &&
Buffer< T, Dims, InClassDimStorage > & operator=(Buffer< T, Dims, InClassDimStorage > &&other) noexcept
Standard move-assignment operator.
int device_detach_native(void *ctx=nullptr)
int device_wrap_native(const struct halide_device_interface_t *device_interface, uint64_t handle, void *ctx=nullptr)
static constexpr bool has_static_halide_type
True if the Halide type is not void (or const void).
Buffer< T, Dims, InClassDimStorage > translated(const std::vector< int > &delta) const
Make an image which refers to the same data translated along the first N dimensions.
HALIDE_ALWAYS_INLINE Dimension dim(int i) const
Access the shape of the buffer.
Buffer(int first, int second, Args... rest)
HALIDE_ALWAYS_INLINE Buffer< typename std::add_const< T >::type, Dims, InClassDimStorage > & as_const() &
as_const() is syntactic sugar for .as<const T>(), to avoid the need to recapitulate the type argument...
Buffer< T, Dims, InClassDimStorage > transposed(int d1, int d2) const
Make a buffer which refers to the same data in the same layout using a swapped indexing order for the...
HALIDE_ALWAYS_INLINE Buffer< T, Dims, InClassDimStorage > & for_each_value(Fn &&f, Args &&...other_buffers)
HALIDE_ALWAYS_INLINE not_void_T & operator()()
BufferDeviceOwnership device_ownership() const
Return the method by which the device field is managed.
void check_overflow()
Check the product of the extents fits in memory.
static bool can_convert_from(const Buffer< T2, D2, S2 > &other)
Determine if a Buffer<T, Dims, InClassDimStorage> can be constructed from some other Buffer type.
Buffer< not_const_T, Dims, InClassDimStorage > copy_to_interleaved(void *(*allocate_fn)(size_t)=nullptr, void(*deallocate_fn)(void *)=nullptr) const
Like copy(), but the copy is created in interleaved memory layout (vs.
int device_and_host_malloc(const struct halide_device_interface_t *device_interface, void *ctx=nullptr)
int device_sync(void *ctx=nullptr)
static Buffer< void, Dims, InClassDimStorage > make_interleaved(halide_type_t t, int width, int height, int channels)
If you use the (x, y, c) indexing convention, then Halide Buffers are stored planar by default.
Buffer(const std::vector< int > &sizes)
Allocate a new image of known type using a vector of ints as the size.
void embed(int d, int pos=0)
Embed a buffer in-place, increasing the dimensionality.
static constexpr halide_type_t static_halide_type()
Get the Halide type of T.
Buffer(T *data, int first, Args &&...rest)
Initialize an Buffer from a pointer and some sizes.
int copy_to_device(const struct halide_device_interface_t *device_interface, void *ctx=nullptr)
Buffer(Array(&vals)[N])
Make an Buffer that refers to a statically sized array.
const halide_buffer_t * raw_buffer() const
HALIDE_ALWAYS_INLINE not_void_T & operator()(int first, Args... rest)
static Buffer< T, Dims, InClassDimStorage > make_interleaved(int width, int height, int channels)
If you use the (x, y, c) indexing convention, then Halide Buffers are stored planar by default.
halide_type_t type() const
Get the type of the elements.
int device_and_host_free(const struct halide_device_interface_t *device_interface, void *ctx=nullptr)
Buffer(int first)
Allocate a new image of the given size.
halide_buffer_t * raw_buffer()
Get a pointer to the raw halide_buffer_t this wraps.
T * end() const
A pointer to one beyond the element with the highest address.
HALIDE_ALWAYS_INLINE bool device_dirty() const
Buffer< T, Dims, InClassDimStorage > cropped(const std::vector< std::pair< int, int > > &rect) const
Make an image that refers to a sub-rectangle of this image along the first N dimensions.
static constexpr int static_dimensions()
Callers should not use the result if has_static_dimensions is false.
void transpose(int d1, int d2)
Transpose a buffer in-place by changing how it is indexed.
void deallocate()
Drop reference to any owned host or device memory, possibly freeing it, if this buffer held the last ...
size_t size_in_bytes() const
The total number of bytes spanned by the data in memory.
bool has_device_allocation() const
void reset()
Reset the Buffer to be equivalent to a default-constructed Buffer of the same static type (if any); B...
Buffer(halide_type_t t, int first, Args... rest)
Allocate a new image of the given size with a runtime type.
int dimensions() const
Get the dimensionality of the buffer.
Buffer(halide_type_t t, add_const_if_T_is_const< void > *data, int d, const halide_dimension_t *shape)
Initialize an Buffer from a pointer to the min coordinate and an array describing the shape.
int min(int i) const
Access to the mins, strides, extents.
HALIDE_ALWAYS_INLINE const Buffer< T, Dims, InClassDimStorage > & for_each_element(Fn &&f) const
Call a function at each site in a buffer.
void device_deallocate()
Drop reference to any owned device memory, possibly freeing it if this buffer held the last reference...
HALIDE_ALWAYS_INLINE const not_void_T & operator()() const
static Buffer< T, Dims, InClassDimStorage > make_scalar()
Make a zero-dimensional Buffer.
void add_dimension_with_stride(int s)
Add a new dimension with a min of zero, an extent of one, and the specified stride.
Buffer(Buffer< T, Dims, InClassDimStorage > &&other) noexcept
Move constructor.
Buffer< T, Dims, InClassDimStorage > cropped(int d, int min, int extent) const
Make an image that refers to a sub-range of this image along the given dimension.
void crop(int d, int min, int extent)
Crop an image in-place along the given dimension.
Buffer< T, Dims, InClassDimStorage > & fill(Fn &&f)
Fill a buffer by evaluating a callable at every site.
static Buffer< T, Dims, InClassDimStorage > make_scalar(T *data)
Make a zero-dimensional Buffer that points to non-owned, existing data.
Buffer< T, Dims, InClassDimStorage > alias() const
Make a copy of the Buffer which shares the underlying host and/or device allocations as the existing ...
void set_min(Args... args)
size_t number_of_elements() const
The total number of elements this buffer represents.
static void assert_can_convert_from(const Buffer< T2, D2, S2 > &other)
Fail an assertion at runtime or compile-time if an Buffer<T, Dims, InClassDimStorage> cannot be const...
void translate(const std::vector< int > &delta)
Translate an image along the first N dimensions by changing how it is indexed.
Buffer(const Buffer< T, Dims, InClassDimStorage > &other)
Copy constructor.
HALIDE_ALWAYS_INLINE not_void_T & operator()(const int *pos)
T * data() const
Get a pointer to the address of the min coordinate.
Buffer< T, Dims, InClassDimStorage > & fill(not_void_T val)
Buffer(const std::vector< int > &sizes, const std::vector< int > &storage_order)
Buffer< T, Dims, InClassDimStorage > & operator=(Buffer< T2, D2, S2 > &&other)
Move from another Buffer of possibly-different dimensionality and type.
Buffer(halide_type_t t, const std::vector< int > &sizes, const std::vector< int > &storage_order)
Allocate a new image of unknown type using a vector of ints as the size and a vector of indices indic...
Buffer(halide_type_t t, add_const_if_T_is_const< void > *data, const std::vector< halide_dimension_t > &shape)
Initialize a Buffer from a pointer to the min coordinate and a vector describing the shape.
static constexpr bool has_static_dimensions
Buffer< T,(Dims==AnyDims ? AnyDims :Dims - 1)> sliced(int d, int pos) const
Make a lower-dimensional buffer that refers to one slice of this buffer.
static Buffer< add_const_if_T_is_const< void >, Dims, InClassDimStorage > make_interleaved(halide_type_t t, T *data, int width, int height, int channels)
Wrap an existing interleaved image.
HALIDE_ALWAYS_INLINE const Buffer< T, Dims, InClassDimStorage > & for_each_value(Fn &&f, Args &&...other_buffers) const
Call a function on every value in the buffer, and the corresponding values in some number of other bu...
bool is_bounds_query() const
Check if an input buffer passed extern stage is a querying bounds.
Buffer< T,(Dims==AnyDims ? AnyDims :Dims - 1)> sliced(int d) const
Make a lower-dimensional buffer that refers to one slice of this buffer at the dimension's minimum.
int left() const
Conventional names for the min and max value of each dimension.
void copy_from(Buffer< T2, D2, S2 > src)
Fill a Buffer with the values at the same coordinates in another Buffer.
Buffer< T, Dims, InClassDimStorage > translated(int d, int dx) const
Make an image which refers to the same data with using translated coordinates in the given dimension.
int stride(int i) const
static Buffer< T, Dims, InClassDimStorage > make_interleaved(T *data, int width, int height, int channels)
Wrap an existing interleaved image.
static void set_default_deallocate_fn(void(*deallocate_fn)(void *))
static Buffer< T, Dims, InClassDimStorage > make_with_shape_of(Buffer< T2, D2, S2 > src, void *(*allocate_fn)(size_t)=nullptr, void(*deallocate_fn)(void *)=nullptr)
Make a buffer with the same shape and memory nesting order as another buffer.
Buffer(const Buffer< T2, D2, S2 > &other)
Construct a Buffer from a Buffer of different dimensionality and type.
bool contains(const std::vector< int > &coords) const
Test if a given coordinate is within the bounds of an image.
Buffer(T *data, const std::vector< halide_dimension_t > &shape)
Initialize a Buffer from a pointer to the min coordinate and a vector describing the shape.
Buffer(T *data, const std::vector< int > &sizes)
Initialize an Buffer from a pointer and a vector of sizes.
Buffer< T, Dims, InClassDimStorage > & operator=(const Buffer< T, Dims, InClassDimStorage > &other)
Standard assignment operator.
T * begin() const
A pointer to the element with the lowest address.
bool all_equal(not_void_T val) const
Tests that all values in this buffer are equal to val.
Buffer(halide_type_t t, add_const_if_T_is_const< void > *data, int first, Args &&...rest)
Initialize an Buffer of runtime type from a pointer and some sizes.
HALIDE_ALWAYS_INLINE Buffer< T2, D2, InClassDimStorage > & as() &
Return a typed reference to this Buffer.
HALIDE_ALWAYS_INLINE const Buffer< T2, D2, InClassDimStorage > & as() const &
Return a const typed reference to this Buffer.
static Buffer< add_const_if_T_is_const< void >, Dims, InClassDimStorage > make_scalar(halide_type_t t)
Make a zero-dimensional Buffer.
bool any_zero(const Container &c)
constexpr int AnyDims
BufferDeviceOwnership
This indicates how to deallocate the device for a Halide::Runtime::Buffer.
This file defines the class FunctionDAG, which is our representation of a Halide pipeline,...
@ Internal
Not visible externally, similar to 'static' linkage in C.
Expr max(const FuncRef &a, const FuncRef &b)
Definition Func.h:600
unsigned __INT64_TYPE__ uint64_t
__UINTPTR_TYPE__ uintptr_t
void * malloc(size_t)
ALWAYS_INLINE T align_up(T p, size_t alignment)
unsigned __INT8_TYPE__ uint8_t
__PTRDIFF_TYPE__ ptrdiff_t
unsigned __INT16_TYPE__ uint16_t
void * memcpy(void *s1, const void *s2, size_t n)
__SIZE_TYPE__ size_t
void * memset(void *s, int val, size_t n)
unsigned __INT32_TYPE__ uint32_t
void free(void *)
A struct acting as a header for allocations owned by the Buffer class itself.
AllocationHeader(void(*deallocate_fn)(void *))
An iterator class, so that you can iterate over coordinates in a dimensions using a range-based for l...
bool operator!=(const iterator &other) const
A similar struct for managing device allocations.
BufferDeviceOwnership ownership
The raw representation of an image passed around by generated Halide code.
int32_t dimensions
The dimensionality of the buffer.
halide_dimension_t * dim
The shape of the buffer.
uint64_t device
A device-handle for e.g.
uint8_t * host
A pointer to the start of the data in main memory.
struct halide_type_t type
The type of each buffer element.
const struct halide_device_interface_t * device_interface
The interface used to interpret the above handle.
Each GPU API provides a halide_device_interface_t struct pointing to the code that manages device all...
int(* device_slice)(void *user_context, const struct halide_buffer_t *src, int slice_dim, int slice_pos, struct halide_buffer_t *dst)
int(* device_and_host_malloc)(void *user_context, struct halide_buffer_t *buf, const struct halide_device_interface_t *device_interface)
int(* wrap_native)(void *user_context, struct halide_buffer_t *buf, uint64_t handle, const struct halide_device_interface_t *device_interface)
int(* device_release_crop)(void *user_context, struct halide_buffer_t *buf)
int(* device_crop)(void *user_context, const struct halide_buffer_t *src, struct halide_buffer_t *dst)
int(* copy_to_host)(void *user_context, struct halide_buffer_t *buf)
int(* copy_to_device)(void *user_context, struct halide_buffer_t *buf, const struct halide_device_interface_t *device_interface)
int(* device_free)(void *user_context, struct halide_buffer_t *buf)
int(* detach_native)(void *user_context, struct halide_buffer_t *buf)
int(* device_and_host_free)(void *user_context, struct halide_buffer_t *buf)
int(* device_malloc)(void *user_context, struct halide_buffer_t *buf, const struct halide_device_interface_t *device_interface)
A runtime tag for a type in the halide type system.