Halide
HalideBuffer.h
Go to the documentation of this file.
1 /** \file
2  * Defines a Buffer type that wraps from halide_buffer_t and adds
3  * functionality, and methods for more conveniently iterating over the
4  * samples in a halide_buffer_t outside of Halide code. */
5 
6 #ifndef HALIDE_RUNTIME_BUFFER_H
7 #define HALIDE_RUNTIME_BUFFER_H
8 
9 #include <algorithm>
10 #include <atomic>
11 #include <cassert>
12 #include <limits>
13 #include <memory>
14 #include <stdint.h>
15 #include <string.h>
16 #include <vector>
17 
18 #if defined(__has_feature)
19 #if __has_feature(memory_sanitizer)
20 #include <sanitizer/msan_interface.h>
21 #endif
22 #endif
23 
24 #include "HalideRuntime.h"
25 
26 #ifdef _MSC_VER
27 #include <malloc.h>
28 #define HALIDE_ALLOCA _alloca
29 #else
30 #define HALIDE_ALLOCA __builtin_alloca
31 #endif
32 
33 // gcc 5.1 has a false positive warning on this code
34 #if __GNUC__ == 5 && __GNUC_MINOR__ == 1
35 #pragma GCC diagnostic ignored "-Warray-bounds"
36 #endif
37 
38 namespace Halide {
39 namespace Runtime {
40 
41 // Forward-declare our Buffer class
42 template<typename T, int D>
43 class Buffer;
44 
45 // A helper to check if a parameter pack is entirely implicitly
46 // int-convertible to use with std::enable_if
47 template<typename... Args>
48 struct AllInts : std::false_type {};
49 
50 template<>
51 struct AllInts<> : std::true_type {};
52 
53 template<typename T, typename... Args>
54 struct AllInts<T, Args...> {
55  static const bool value = std::is_convertible<T, int>::value && AllInts<Args...>::value;
56 };
57 
58 // Floats and doubles are technically implicitly int-convertible, but
59 // doing so produces a warning we treat as an error, so just disallow
60 // it here.
61 template<typename... Args>
62 struct AllInts<float, Args...> : std::false_type {};
63 
64 template<typename... Args>
65 struct AllInts<double, Args...> : std::false_type {};
66 
67 // A helper to detect if there are any zeros in a container
68 namespace Internal {
69 template<typename Container>
70 bool any_zero(const Container &c) {
71  for (int i : c) {
72  if (i == 0) return true;
73  }
74  return false;
75 }
76 } // namespace Internal
77 
78 /** A struct acting as a header for allocations owned by the Buffer
79  * class itself. */
81  void (*deallocate_fn)(void *);
82  std::atomic<int> ref_count;
83 
84  // Note that ref_count always starts at 1
87  }
88 };
89 
90 /** This indicates how to deallocate the device for a Halide::Runtime::Buffer. */
91 enum struct BufferDeviceOwnership : int {
92  Allocated, ///> halide_device_free will be called when device ref count goes to zero
93  WrappedNative, ///> halide_device_detach_native will be called when device ref count goes to zero
94  Unmanaged, ///> No free routine will be called when device ref count goes to zero
95  AllocatedDeviceAndHost, ///> Call device_and_host_free when DevRefCount goes to zero.
96  Cropped, ///> Call halide_device_release_crop when DevRefCount goes to zero.
97 };
98 
99 /** A similar struct for managing device allocations. */
101  // This is only ever constructed when there's something to manage,
102  // so start at one.
103  std::atomic<int> count{1};
105 };
106 
107 /** A templated Buffer class that wraps halide_buffer_t and adds
108  * functionality. When using Halide from C++, this is the preferred
109  * way to create input and output buffers. The overhead of using this
110  * class relative to a naked halide_buffer_t is minimal - it uses another
111  * ~16 bytes on the stack, and does no dynamic allocations when using
112  * it to represent existing memory of a known maximum dimensionality.
113  *
114  * The template parameter T is the element type. For buffers where the
115  * element type is unknown, or may vary, use void or const void.
116  *
117  * D is the maximum number of dimensions that can be represented using
118  * space inside the class itself. Set it to the maximum dimensionality
119  * you expect this buffer to be. If the actual dimensionality exceeds
120  * this, heap storage is allocated to track the shape of the buffer. D
121  * defaults to 4, which should cover nearly all usage.
122  *
123  * The class optionally allocates and owns memory for the image using
124  * a shared pointer allocated with the provided allocator. If they are
125  * null, malloc and free are used. Any device-side allocation is
126  * considered as owned if and only if the host-side allocation is
127  * owned. */
128 template<typename T = void, int D = 4>
129 class Buffer {
130  /** The underlying halide_buffer_t */
131  halide_buffer_t buf = {0};
132 
133  /** Some in-class storage for shape of the dimensions. */
134  halide_dimension_t shape[D];
135 
136  /** The allocation owned by this Buffer. NULL if the Buffer does not
137  * own the memory. */
138  AllocationHeader *alloc = nullptr;
139 
140  /** A reference count for the device allocation owned by this
141  * buffer. */
142  mutable DeviceRefCount *dev_ref_count = nullptr;
143 
144  /** True if T is of type void or const void */
145  static const bool T_is_void = std::is_same<typename std::remove_const<T>::type, void>::value;
146 
147  /** A type function that adds a const qualifier if T is a const type. */
148  template<typename T2>
149  using add_const_if_T_is_const = typename std::conditional<std::is_const<T>::value, const T2, T2>::type;
150 
151  /** T unless T is (const) void, in which case (const)
152  * uint8_t. Useful for providing return types for operator() */
153  using not_void_T = typename std::conditional<T_is_void,
154  add_const_if_T_is_const<uint8_t>,
155  T>::type;
156 
157  /** T with constness removed. Useful for return type of copy(). */
158  using not_const_T = typename std::remove_const<T>::type;
159 
160  /** The type the elements are stored as. Equal to not_void_T
161  * unless T is a pointer, in which case uint64_t. Halide stores
162  * all pointer types as uint64s internally, even on 32-bit
163  * systems. */
164  using storage_T = typename std::conditional<std::is_pointer<T>::value, uint64_t, not_void_T>::type;
165 
166 public:
167  /** True if the Halide type is not void (or const void). */
168  static constexpr bool has_static_halide_type = !T_is_void;
169 
170  /** Get the Halide type of T. Callers should not use the result if
171  * has_static_halide_type is false. */
173  return halide_type_of<typename std::remove_cv<not_void_T>::type>();
174  }
175 
176  /** Does this Buffer own the host memory it refers to? */
177  bool owns_host_memory() const {
178  return alloc != nullptr;
179  }
180 
181 private:
182  /** Increment the reference count of any owned allocation */
183  void incref() const {
184  if (owns_host_memory()) {
185  alloc->ref_count++;
186  }
187  if (buf.device) {
188  if (!dev_ref_count) {
189  // I seem to have a non-zero dev field but no
190  // reference count for it. I must have been given a
191  // device allocation by a Halide pipeline, and have
192  // never been copied from since. Take sole ownership
193  // of it.
194  dev_ref_count = new DeviceRefCount;
195  }
196  dev_ref_count->count++;
197  }
198  }
199 
200  // Note that this is called "cropped" but can also encompass a slice/embed
201  // operation as well.
202  struct DevRefCountCropped : DeviceRefCount {
203  Buffer<T, D> cropped_from;
204  DevRefCountCropped(const Buffer<T, D> &cropped_from)
205  : cropped_from(cropped_from) {
207  }
208  };
209 
210  /** Setup the device ref count for a buffer to indicate it is a crop (or slice, embed, etc) of cropped_from */
211  void crop_from(const Buffer<T, D> &cropped_from) {
212  assert(dev_ref_count == nullptr);
213  dev_ref_count = new DevRefCountCropped(cropped_from);
214  }
215 
216  /** Decrement the reference count of any owned allocation and free host
217  * and device memory if it hits zero. Sets alloc to nullptr. */
218  void decref() {
219  if (owns_host_memory()) {
220  int new_count = --(alloc->ref_count);
221  if (new_count == 0) {
222  void (*fn)(void *) = alloc->deallocate_fn;
223  alloc->~AllocationHeader();
224  fn(alloc);
225  }
226  buf.host = nullptr;
227  alloc = nullptr;
228  set_host_dirty(false);
229  }
230  decref_dev();
231  }
232 
233  void decref_dev() {
234  int new_count = 0;
235  if (dev_ref_count) {
236  new_count = --(dev_ref_count->count);
237  }
238  if (new_count == 0) {
239  if (buf.device) {
240  assert(!(alloc && device_dirty()) &&
241  "Implicitly freeing a dirty device allocation while a host allocation still lives. "
242  "Call device_free explicitly if you want to drop dirty device-side data. "
243  "Call copy_to_host explicitly if you want the data copied to the host allocation "
244  "before the device allocation is freed.");
245  if (dev_ref_count && dev_ref_count->ownership == BufferDeviceOwnership::WrappedNative) {
246  buf.device_interface->detach_native(nullptr, &buf);
247  } else if (dev_ref_count && dev_ref_count->ownership == BufferDeviceOwnership::AllocatedDeviceAndHost) {
248  buf.device_interface->device_and_host_free(nullptr, &buf);
249  } else if (dev_ref_count && dev_ref_count->ownership == BufferDeviceOwnership::Cropped) {
250  buf.device_interface->device_release_crop(nullptr, &buf);
251  } else if (dev_ref_count == nullptr || dev_ref_count->ownership == BufferDeviceOwnership::Allocated) {
252  buf.device_interface->device_free(nullptr, &buf);
253  }
254  }
255  if (dev_ref_count) {
256  if (dev_ref_count->ownership == BufferDeviceOwnership::Cropped) {
257  delete (DevRefCountCropped *)dev_ref_count;
258  } else {
259  delete dev_ref_count;
260  }
261  }
262  }
263  buf.device = 0;
264  buf.device_interface = nullptr;
265  dev_ref_count = nullptr;
266  }
267 
268  void free_shape_storage() {
269  if (buf.dim != shape) {
270  delete[] buf.dim;
271  buf.dim = nullptr;
272  }
273  }
274 
275  void make_shape_storage(const int dimensions) {
276  // This should usually be inlined, so if dimensions is statically known,
277  // we can skip the call to new
278  buf.dimensions = dimensions;
279  buf.dim = (dimensions <= D) ? shape : new halide_dimension_t[dimensions];
280  }
281 
282  void copy_shape_from(const halide_buffer_t &other) {
283  // All callers of this ensure that buf.dimensions == other.dimensions.
284  make_shape_storage(other.dimensions);
285  std::copy(other.dim, other.dim + other.dimensions, buf.dim);
286  }
287 
288  template<typename T2, int D2>
289  void move_shape_from(Buffer<T2, D2> &&other) {
290  if (other.shape == other.buf.dim) {
291  copy_shape_from(other.buf);
292  } else {
293  buf.dim = other.buf.dim;
294  other.buf.dim = nullptr;
295  }
296  }
297 
298  /** Initialize the shape from a halide_buffer_t. */
299  void initialize_from_buffer(const halide_buffer_t &b,
300  BufferDeviceOwnership ownership) {
301  memcpy(&buf, &b, sizeof(halide_buffer_t));
302  copy_shape_from(b);
303  if (b.device) {
304  dev_ref_count = new DeviceRefCount;
305  dev_ref_count->ownership = ownership;
306  }
307  }
308 
309  /** Initialize the shape from an array of ints */
310  void initialize_shape(const int *sizes) {
311  for (int i = 0; i < buf.dimensions; i++) {
312  buf.dim[i].min = 0;
313  buf.dim[i].extent = sizes[i];
314  if (i == 0) {
315  buf.dim[i].stride = 1;
316  } else {
317  buf.dim[i].stride = buf.dim[i - 1].stride * buf.dim[i - 1].extent;
318  }
319  }
320  }
321 
322  /** Initialize the shape from a vector of extents */
323  void initialize_shape(const std::vector<int> &sizes) {
324  assert(buf.dimensions == (int)sizes.size());
325  initialize_shape(sizes.data());
326  }
327 
328  /** Initialize the shape from the static shape of an array */
329  template<typename Array, size_t N>
330  void initialize_shape_from_array_shape(int next, Array (&vals)[N]) {
331  buf.dim[next].min = 0;
332  buf.dim[next].extent = (int)N;
333  if (next == 0) {
334  buf.dim[next].stride = 1;
335  } else {
336  initialize_shape_from_array_shape(next - 1, vals[0]);
337  buf.dim[next].stride = buf.dim[next - 1].stride * buf.dim[next - 1].extent;
338  }
339  }
340 
341  /** Base case for the template recursion above. */
342  template<typename T2>
343  void initialize_shape_from_array_shape(int, const T2 &) {
344  }
345 
346  /** Get the dimensionality of a multi-dimensional C array */
347  template<typename Array, size_t N>
348  static int dimensionality_of_array(Array (&vals)[N]) {
349  return dimensionality_of_array(vals[0]) + 1;
350  }
351 
352  template<typename T2>
353  static int dimensionality_of_array(const T2 &) {
354  return 0;
355  }
356 
357  /** Get the underlying halide_type_t of an array's element type. */
358  template<typename Array, size_t N>
359  static halide_type_t scalar_type_of_array(Array (&vals)[N]) {
360  return scalar_type_of_array(vals[0]);
361  }
362 
363  template<typename T2>
364  static halide_type_t scalar_type_of_array(const T2 &) {
365  return halide_type_of<typename std::remove_cv<T2>::type>();
366  }
367 
368  /** Crop a single dimension without handling device allocation. */
369  void crop_host(int d, int min, int extent) {
370  assert(dim(d).min() <= min);
371  assert(dim(d).max() >= min + extent - 1);
372  int shift = min - dim(d).min();
373  if (buf.host != nullptr) {
374  buf.host += shift * dim(d).stride() * type().bytes();
375  }
376  buf.dim[d].min = min;
377  buf.dim[d].extent = extent;
378  }
379 
380  /** Crop as many dimensions as are in rect, without handling device allocation. */
381  void crop_host(const std::vector<std::pair<int, int>> &rect) {
382  assert(rect.size() <= static_cast<decltype(rect.size())>(std::numeric_limits<int>::max()));
383  int limit = (int)rect.size();
384  assert(limit <= dimensions());
385  for (int i = 0; i < limit; i++) {
386  crop_host(i, rect[i].first, rect[i].second);
387  }
388  }
389 
390  void complete_device_crop(Buffer<T, D> &result_host_cropped) const {
391  assert(buf.device_interface != nullptr);
392  if (buf.device_interface->device_crop(nullptr, &this->buf, &result_host_cropped.buf) == 0) {
393  const Buffer<T, D> *cropped_from = this;
394  // TODO: Figure out what to do if dev_ref_count is nullptr. Should incref logic run here?
395  // is it possible to get to this point without incref having run at least once since
396  // the device field was set? (I.e. in the internal logic of crop. incref might have been
397  // called.)
398  if (dev_ref_count != nullptr && dev_ref_count->ownership == BufferDeviceOwnership::Cropped) {
399  cropped_from = &((DevRefCountCropped *)dev_ref_count)->cropped_from;
400  }
401  result_host_cropped.crop_from(*cropped_from);
402  }
403  }
404 
405  /** slice a single dimension without handling device allocation. */
406  void slice_host(int d, int pos) {
407  assert(d >= 0 && d < dimensions());
408  assert(pos >= dim(d).min() && pos <= dim(d).max());
409  buf.dimensions--;
410  int shift = pos - buf.dim[d].min;
411  if (buf.host != nullptr) {
412  buf.host += shift * buf.dim[d].stride * type().bytes();
413  }
414  for (int i = d; i < buf.dimensions; i++) {
415  buf.dim[i] = buf.dim[i + 1];
416  }
417  buf.dim[buf.dimensions] = {0, 0, 0};
418  }
419 
420  void complete_device_slice(Buffer<T, D> &result_host_sliced, int d, int pos) const {
421  assert(buf.device_interface != nullptr);
422  if (buf.device_interface->device_slice(nullptr, &this->buf, d, pos, &result_host_sliced.buf) == 0) {
423  const Buffer<T, D> *sliced_from = this;
424  // TODO: Figure out what to do if dev_ref_count is nullptr. Should incref logic run here?
425  // is it possible to get to this point without incref having run at least once since
426  // the device field was set? (I.e. in the internal logic of slice. incref might have been
427  // called.)
428  if (dev_ref_count != nullptr && dev_ref_count->ownership == BufferDeviceOwnership::Cropped) {
429  sliced_from = &((DevRefCountCropped *)dev_ref_count)->cropped_from;
430  }
431  // crop_from() is correct here, despite the fact that we are slicing.
432  result_host_sliced.crop_from(*sliced_from);
433  }
434  }
435 
436 public:
437  typedef T ElemType;
438 
439  /** Read-only access to the shape */
440  class Dimension {
441  const halide_dimension_t &d;
442 
443  public:
444  /** The lowest coordinate in this dimension */
445  HALIDE_ALWAYS_INLINE int min() const {
446  return d.min;
447  }
448 
449  /** The number of elements in memory you have to step over to
450  * increment this coordinate by one. */
452  return d.stride;
453  }
454 
455  /** The extent of the image along this dimension */
457  return d.extent;
458  }
459 
460  /** The highest coordinate in this dimension */
461  HALIDE_ALWAYS_INLINE int max() const {
462  return min() + extent() - 1;
463  }
464 
465  /** An iterator class, so that you can iterate over
466  * coordinates in a dimensions using a range-based for loop. */
467  struct iterator {
468  int val;
469  int operator*() const {
470  return val;
471  }
472  bool operator!=(const iterator &other) const {
473  return val != other.val;
474  }
476  val++;
477  return *this;
478  }
479  };
480 
481  /** An iterator that points to the min coordinate */
483  return {min()};
484  }
485 
486  /** An iterator that points to one past the max coordinate */
488  return {min() + extent()};
489  }
490 
492  : d(dim){};
493  };
494 
495  /** Access the shape of the buffer */
496  HALIDE_ALWAYS_INLINE Dimension dim(int i) const {
497  assert(i >= 0 && i < this->dimensions());
498  return Dimension(buf.dim[i]);
499  }
500 
501  /** Access to the mins, strides, extents. Will be deprecated. Do not use. */
502  // @{
503  int min(int i) const {
504  return dim(i).min();
505  }
506  int extent(int i) const {
507  return dim(i).extent();
508  }
509  int stride(int i) const {
510  return dim(i).stride();
511  }
512  // @}
513 
514  /** The total number of elements this buffer represents. Equal to
515  * the product of the extents */
516  size_t number_of_elements() const {
517  size_t s = 1;
518  for (int i = 0; i < dimensions(); i++) {
519  s *= dim(i).extent();
520  }
521  return s;
522  }
523 
524  /** Get the dimensionality of the buffer. */
525  int dimensions() const {
526  return buf.dimensions;
527  }
528 
529  /** Get the type of the elements. */
530  halide_type_t type() const {
531  return buf.type;
532  }
533 
534 private:
535  /** Offset to the element with the lowest address. If all
536  * strides are positive, equal to zero. Offset is in elements, not bytes. */
537  ptrdiff_t begin_offset() const {
538  ptrdiff_t index = 0;
539  for (int i = 0; i < dimensions(); i++) {
540  if (dim(i).stride() < 0) {
541  index += dim(i).stride() * (dim(i).extent() - 1);
542  }
543  }
544  return index;
545  }
546 
547  /** An offset to one beyond the element with the highest address.
548  * Offset is in elements, not bytes. */
549  ptrdiff_t end_offset() const {
550  ptrdiff_t index = 0;
551  for (int i = 0; i < dimensions(); i++) {
552  if (dim(i).stride() > 0) {
553  index += dim(i).stride() * (dim(i).extent() - 1);
554  }
555  }
556  index += 1;
557  return index;
558  }
559 
560 public:
561  /** A pointer to the element with the lowest address. If all
562  * strides are positive, equal to the host pointer. */
563  T *begin() const {
564  assert(buf.host != nullptr); // Cannot call begin() on an unallocated Buffer.
565  return (T *)(buf.host + begin_offset() * type().bytes());
566  }
567 
568  /** A pointer to one beyond the element with the highest address. */
569  T *end() const {
570  assert(buf.host != nullptr); // Cannot call end() on an unallocated Buffer.
571  return (T *)(buf.host + end_offset() * type().bytes());
572  }
573 
574  /** The total number of bytes spanned by the data in memory. */
575  size_t size_in_bytes() const {
576  return (size_t)(end_offset() - begin_offset()) * type().bytes();
577  }
578 
579  /** Reset the Buffer to be equivalent to a default-constructed Buffer
580  * of the same static type (if any); Buffer<void> will have its runtime
581  * type reset to uint8. */
582  void reset() {
583  *this = Buffer();
584  }
585 
587  : shape() {
588  buf.type = static_halide_type();
589  make_shape_storage(0);
590  }
591 
592  /** Make a Buffer from a halide_buffer_t */
593  explicit Buffer(const halide_buffer_t &buf,
595  assert(T_is_void || buf.type == static_halide_type());
596  initialize_from_buffer(buf, ownership);
597  }
598 
599  /** Give Buffers access to the members of Buffers of different dimensionalities and types. */
600  template<typename T2, int D2>
601  friend class Buffer;
602 
603 private:
604  template<typename T2, int D2>
605  static void static_assert_can_convert_from() {
606  static_assert((!std::is_const<T2>::value || std::is_const<T>::value),
607  "Can't convert from a Buffer<const T> to a Buffer<T>");
608  static_assert(std::is_same<typename std::remove_const<T>::type,
609  typename std::remove_const<T2>::type>::value ||
610  T_is_void || Buffer<T2, D2>::T_is_void,
611  "type mismatch constructing Buffer");
612  }
613 
614 public:
615  /** Determine if if an Buffer<T, D> can be constructed from some other Buffer type.
616  * If this can be determined at compile time, fail with a static assert; otherwise
617  * return a boolean based on runtime typing. */
618  template<typename T2, int D2>
619  static bool can_convert_from(const Buffer<T2, D2> &other) {
620  static_assert_can_convert_from<T2, D2>();
621  if (Buffer<T2, D2>::T_is_void && !T_is_void) {
622  return other.type() == static_halide_type();
623  }
624  return true;
625  }
626 
627  /** Fail an assertion at runtime or compile-time if an Buffer<T, D>
628  * cannot be constructed from some other Buffer type. */
629  template<typename T2, int D2>
630  static void assert_can_convert_from(const Buffer<T2, D2> &other) {
631  // Explicitly call static_assert_can_convert_from() here so
632  // that we always get compile-time checking, even if compiling with
633  // assertions disabled.
634  static_assert_can_convert_from<T2, D2>();
635  assert(can_convert_from(other));
636  }
637 
638  /** Copy constructor. Does not copy underlying data. */
639  Buffer(const Buffer<T, D> &other)
640  : buf(other.buf),
641  alloc(other.alloc) {
642  other.incref();
643  dev_ref_count = other.dev_ref_count;
644  copy_shape_from(other.buf);
645  }
646 
647  /** Construct a Buffer from a Buffer of different dimensionality
648  * and type. Asserts that the type matches (at runtime, if one of
649  * the types is void). Note that this constructor is
650  * implicit. This, for example, lets you pass things like
651  * Buffer<T> or Buffer<const void> to functions expected
652  * Buffer<const T>. */
653  template<typename T2, int D2>
654  Buffer(const Buffer<T2, D2> &other)
655  : buf(other.buf),
656  alloc(other.alloc) {
658  other.incref();
659  dev_ref_count = other.dev_ref_count;
660  copy_shape_from(other.buf);
661  }
662 
663  /** Move constructor */
664  Buffer(Buffer<T, D> &&other) noexcept
665  : buf(other.buf),
666  alloc(other.alloc),
667  dev_ref_count(other.dev_ref_count) {
668  other.dev_ref_count = nullptr;
669  other.alloc = nullptr;
670  move_shape_from(std::forward<Buffer<T, D>>(other));
671  other.buf = halide_buffer_t();
672  }
673 
674  /** Move-construct a Buffer from a Buffer of different
675  * dimensionality and type. Asserts that the types match (at
676  * runtime if one of the types is void). */
677  template<typename T2, int D2>
679  : buf(other.buf),
680  alloc(other.alloc),
681  dev_ref_count(other.dev_ref_count) {
683  other.dev_ref_count = nullptr;
684  other.alloc = nullptr;
685  move_shape_from(std::forward<Buffer<T2, D2>>(other));
686  other.buf = halide_buffer_t();
687  }
688 
689  /** Assign from another Buffer of possibly-different
690  * dimensionality and type. Asserts that the types match (at
691  * runtime if one of the types is void). */
692  template<typename T2, int D2>
694  if ((const void *)this == (const void *)&other) {
695  return *this;
696  }
698  other.incref();
699  decref();
700  dev_ref_count = other.dev_ref_count;
701  alloc = other.alloc;
702  free_shape_storage();
703  buf = other.buf;
704  copy_shape_from(other.buf);
705  return *this;
706  }
707 
708  /** Standard assignment operator */
710  if (this == &other) {
711  return *this;
712  }
713  other.incref();
714  decref();
715  dev_ref_count = other.dev_ref_count;
716  alloc = other.alloc;
717  free_shape_storage();
718  buf = other.buf;
719  copy_shape_from(other.buf);
720  return *this;
721  }
722 
723  /** Move from another Buffer of possibly-different
724  * dimensionality and type. Asserts that the types match (at
725  * runtime if one of the types is void). */
726  template<typename T2, int D2>
729  decref();
730  alloc = other.alloc;
731  other.alloc = nullptr;
732  dev_ref_count = other.dev_ref_count;
733  other.dev_ref_count = nullptr;
734  free_shape_storage();
735  buf = other.buf;
736  move_shape_from(std::forward<Buffer<T2, D2>>(other));
737  other.buf = halide_buffer_t();
738  return *this;
739  }
740 
741  /** Standard move-assignment operator */
742  Buffer<T, D> &operator=(Buffer<T, D> &&other) noexcept {
743  decref();
744  alloc = other.alloc;
745  other.alloc = nullptr;
746  dev_ref_count = other.dev_ref_count;
747  other.dev_ref_count = nullptr;
748  free_shape_storage();
749  buf = other.buf;
750  move_shape_from(std::forward<Buffer<T, D>>(other));
751  other.buf = halide_buffer_t();
752  return *this;
753  }
754 
755  /** Check the product of the extents fits in memory. */
756  void check_overflow() {
757  size_t size = type().bytes();
758  for (int i = 0; i < dimensions(); i++) {
759  size *= dim(i).extent();
760  }
761  // We allow 2^31 or 2^63 bytes, so drop the top bit.
762  size = (size << 1) >> 1;
763  for (int i = 0; i < dimensions(); i++) {
764  size /= dim(i).extent();
765  }
766  assert(size == (size_t)type().bytes() && "Error: Overflow computing total size of buffer.");
767  }
768 
769  /** Allocate memory for this Buffer. Drops the reference to any
770  * owned memory. */
771  void allocate(void *(*allocate_fn)(size_t) = nullptr,
772  void (*deallocate_fn)(void *) = nullptr) {
773  if (!allocate_fn) {
774  allocate_fn = malloc;
775  }
776  if (!deallocate_fn) {
777  deallocate_fn = free;
778  }
779 
780  // Drop any existing allocation
781  deallocate();
782 
783  // Conservatively align images to 128 bytes. This is enough
784  // alignment for all the platforms we might use.
785  size_t size = size_in_bytes();
786  const size_t alignment = 128;
787  size = (size + alignment - 1) & ~(alignment - 1);
788  void *alloc_storage = allocate_fn(size + sizeof(AllocationHeader) + alignment - 1);
789  alloc = new (alloc_storage) AllocationHeader(deallocate_fn);
790  uint8_t *unaligned_ptr = ((uint8_t *)alloc) + sizeof(AllocationHeader);
791  buf.host = (uint8_t *)((uintptr_t)(unaligned_ptr + alignment - 1) & ~(alignment - 1));
792  }
793 
794  /** Drop reference to any owned host or device memory, possibly
795  * freeing it, if this buffer held the last reference to
796  * it. Retains the shape of the buffer. Does nothing if this
797  * buffer did not allocate its own memory. */
798  void deallocate() {
799  decref();
800  }
801 
802  /** Drop reference to any owned device memory, possibly freeing it
803  * if this buffer held the last reference to it. Asserts that
804  * device_dirty is false. */
806  decref_dev();
807  }
808 
809  /** Allocate a new image of the given size with a runtime
810  * type. Only used when you do know what size you want but you
811  * don't know statically what type the elements are. Pass zeroes
812  * to make a buffer suitable for bounds query calls. */
813  template<typename... Args,
814  typename = typename std::enable_if<AllInts<Args...>::value>::type>
815  Buffer(halide_type_t t, int first, Args... rest) {
816  if (!T_is_void) {
817  assert(static_halide_type() == t);
818  }
819  int extents[] = {first, (int)rest...};
820  buf.type = t;
821  constexpr int buf_dimensions = 1 + (int)(sizeof...(rest));
822  make_shape_storage(buf_dimensions);
823  initialize_shape(extents);
824  if (!Internal::any_zero(extents)) {
825  check_overflow();
826  allocate();
827  }
828  }
829 
830  /** Allocate a new image of the given size. Pass zeroes to make a
831  * buffer suitable for bounds query calls. */
832  // @{
833 
834  // The overload with one argument is 'explicit', so that
835  // (say) int is not implicitly convertable to Buffer<int>
836  explicit Buffer(int first) {
837  static_assert(!T_is_void,
838  "To construct an Buffer<void>, pass a halide_type_t as the first argument to the constructor");
839  int extents[] = {first};
840  buf.type = static_halide_type();
841  constexpr int buf_dimensions = 1;
842  make_shape_storage(buf_dimensions);
843  initialize_shape(extents);
844  if (first != 0) {
845  check_overflow();
846  allocate();
847  }
848  }
849 
850  template<typename... Args,
851  typename = typename std::enable_if<AllInts<Args...>::value>::type>
852  Buffer(int first, int second, Args... rest) {
853  static_assert(!T_is_void,
854  "To construct an Buffer<void>, pass a halide_type_t as the first argument to the constructor");
855  int extents[] = {first, second, (int)rest...};
856  buf.type = static_halide_type();
857  constexpr int buf_dimensions = 2 + (int)(sizeof...(rest));
858  make_shape_storage(buf_dimensions);
859  initialize_shape(extents);
860  if (!Internal::any_zero(extents)) {
861  check_overflow();
862  allocate();
863  }
864  }
865  // @}
866 
867  /** Allocate a new image of unknown type using a vector of ints as the size. */
868  Buffer(halide_type_t t, const std::vector<int> &sizes) {
869  if (!T_is_void) {
870  assert(static_halide_type() == t);
871  }
872  buf.type = t;
873  make_shape_storage((int)sizes.size());
874  initialize_shape(sizes);
875  if (!Internal::any_zero(sizes)) {
876  check_overflow();
877  allocate();
878  }
879  }
880 
881  /** Allocate a new image of known type using a vector of ints as the size. */
882  explicit Buffer(const std::vector<int> &sizes)
883  : Buffer(static_halide_type(), sizes) {
884  }
885 
886 private:
887  // Create a copy of the sizes vector, ordered as specified by order.
888  static std::vector<int> make_ordered_sizes(const std::vector<int> &sizes, const std::vector<int> &order) {
889  assert(order.size() == sizes.size());
890  std::vector<int> ordered_sizes(sizes.size());
891  for (size_t i = 0; i < sizes.size(); ++i) {
892  ordered_sizes[i] = sizes.at(order[i]);
893  }
894  return ordered_sizes;
895  }
896 
897 public:
898  /** Allocate a new image of unknown type using a vector of ints as the size and
899  * a vector of indices indicating the storage order for each dimension. The
900  * length of the sizes vector and the storage-order vector must match. For instance,
901  * to allocate an interleaved RGB buffer, you would pass {2, 0, 1} for storage_order. */
902  Buffer(halide_type_t t, const std::vector<int> &sizes, const std::vector<int> &storage_order)
903  : Buffer(t, make_ordered_sizes(sizes, storage_order)) {
904  transpose(storage_order);
905  }
906 
907  Buffer(const std::vector<int> &sizes, const std::vector<int> &storage_order)
908  : Buffer(static_halide_type(), sizes, storage_order) {
909  }
910 
911  /** Make an Buffer that refers to a statically sized array. Does not
912  * take ownership of the data, and does not set the host_dirty flag. */
913  template<typename Array, size_t N>
914  explicit Buffer(Array (&vals)[N]) {
915  const int buf_dimensions = dimensionality_of_array(vals);
916  buf.type = scalar_type_of_array(vals);
917  buf.host = (uint8_t *)vals;
918  make_shape_storage(buf_dimensions);
919  initialize_shape_from_array_shape(buf.dimensions - 1, vals);
920  }
921 
922  /** Initialize an Buffer of runtime type from a pointer and some
923  * sizes. Assumes dense row-major packing and a min coordinate of
924  * zero. Does not take ownership of the data and does not set the
925  * host_dirty flag. */
926  template<typename... Args,
927  typename = typename std::enable_if<AllInts<Args...>::value>::type>
928  explicit Buffer(halide_type_t t, add_const_if_T_is_const<void> *data, int first, Args &&... rest) {
929  if (!T_is_void) {
930  assert(static_halide_type() == t);
931  }
932  int extents[] = {first, (int)rest...};
933  buf.type = t;
934  constexpr int buf_dimensions = 1 + (int)(sizeof...(rest));
935  buf.host = (uint8_t *)const_cast<void *>(data);
936  make_shape_storage(buf_dimensions);
937  initialize_shape(extents);
938  }
939 
940  /** Initialize an Buffer from a pointer and some sizes. Assumes
941  * dense row-major packing and a min coordinate of zero. Does not
942  * take ownership of the data and does not set the host_dirty flag. */
943  template<typename... Args,
944  typename = typename std::enable_if<AllInts<Args...>::value>::type>
945  explicit Buffer(T *data, int first, Args &&... rest) {
946  int extents[] = {first, (int)rest...};
947  buf.type = static_halide_type();
948  constexpr int buf_dimensions = 1 + (int)(sizeof...(rest));
949  buf.host = (uint8_t *)const_cast<typename std::remove_const<T>::type *>(data);
950  make_shape_storage(buf_dimensions);
951  initialize_shape(extents);
952  }
953 
954  /** Initialize an Buffer from a pointer and a vector of
955  * sizes. Assumes dense row-major packing and a min coordinate of
956  * zero. Does not take ownership of the data and does not set the
957  * host_dirty flag. */
958  explicit Buffer(T *data, const std::vector<int> &sizes) {
959  buf.type = static_halide_type();
960  buf.host = (uint8_t *)const_cast<typename std::remove_const<T>::type *>(data);
961  make_shape_storage((int)sizes.size());
962  initialize_shape(sizes);
963  }
964 
965  /** Initialize an Buffer of runtime type from a pointer and a
966  * vector of sizes. Assumes dense row-major packing and a min
967  * coordinate of zero. Does not take ownership of the data and
968  * does not set the host_dirty flag. */
969  explicit Buffer(halide_type_t t, add_const_if_T_is_const<void> *data, const std::vector<int> &sizes) {
970  if (!T_is_void) {
971  assert(static_halide_type() == t);
972  }
973  buf.type = t;
974  buf.host = (uint8_t *)const_cast<void *>(data);
975  make_shape_storage((int)sizes.size());
976  initialize_shape(sizes);
977  }
978 
979  /** Initialize an Buffer from a pointer to the min coordinate and
980  * an array describing the shape. Does not take ownership of the
981  * data, and does not set the host_dirty flag. */
982  explicit Buffer(halide_type_t t, add_const_if_T_is_const<void> *data, int d, const halide_dimension_t *shape) {
983  if (!T_is_void) {
984  assert(static_halide_type() == t);
985  }
986  buf.type = t;
987  buf.host = (uint8_t *)const_cast<void *>(data);
988  make_shape_storage(d);
989  for (int i = 0; i < d; i++) {
990  buf.dim[i] = shape[i];
991  }
992  }
993 
994  /** Initialize a Buffer from a pointer to the min coordinate and
995  * a vector describing the shape. Does not take ownership of the
996  * data, and does not set the host_dirty flag. */
997  explicit inline Buffer(halide_type_t t, add_const_if_T_is_const<void> *data,
998  const std::vector<halide_dimension_t> &shape)
999  : Buffer(t, data, (int)shape.size(), shape.data()) {
1000  }
1001 
1002  /** Initialize an Buffer from a pointer to the min coordinate and
1003  * an array describing the shape. Does not take ownership of the
1004  * data and does not set the host_dirty flag. */
1005  explicit Buffer(T *data, int d, const halide_dimension_t *shape) {
1006  buf.type = static_halide_type();
1007  buf.host = (uint8_t *)const_cast<typename std::remove_const<T>::type *>(data);
1008  make_shape_storage(d);
1009  for (int i = 0; i < d; i++) {
1010  buf.dim[i] = shape[i];
1011  }
1012  }
1013 
1014  /** Initialize a Buffer from a pointer to the min coordinate and
1015  * a vector describing the shape. Does not take ownership of the
1016  * data, and does not set the host_dirty flag. */
1017  explicit inline Buffer(T *data, const std::vector<halide_dimension_t> &shape)
1018  : Buffer(data, (int)shape.size(), shape.data()) {
1019  }
1020 
1021  /** Destructor. Will release any underlying owned allocation if
1022  * this is the last reference to it. Will assert fail if there are
1023  * weak references to this Buffer outstanding. */
1025  free_shape_storage();
1026  decref();
1027  }
1028 
1029  /** Get a pointer to the raw halide_buffer_t this wraps. */
1030  // @{
1032  return &buf;
1033  }
1034 
1035  const halide_buffer_t *raw_buffer() const {
1036  return &buf;
1037  }
1038  // @}
1039 
1040  /** Provide a cast operator to halide_buffer_t *, so that
1041  * instances can be passed directly to Halide filters. */
1042  operator halide_buffer_t *() {
1043  return &buf;
1044  }
1045 
1046  /** Return a typed reference to this Buffer. Useful for converting
1047  * a reference to a Buffer<void> to a reference to, for example, a
1048  * Buffer<const uint8_t>, or converting a Buffer<T>& to Buffer<const T>&.
1049  * Does a runtime assert if the source buffer type is void. */
1050  template<typename T2, int D2 = D,
1051  typename = typename std::enable_if<(D2 <= D)>::type>
1053  Buffer<T2, D2> &
1054  as() & {
1056  return *((Buffer<T2, D2> *)this);
1057  }
1058 
1059  /** Return a const typed reference to this Buffer. Useful for
1060  * converting a conference reference to one Buffer type to a const
1061  * reference to another Buffer type. Does a runtime assert if the
1062  * source buffer type is void. */
1063  template<typename T2, int D2 = D,
1064  typename = typename std::enable_if<(D2 <= D)>::type>
1067  return *((const Buffer<T2, D2> *)this);
1068  }
1069 
1070  /** Returns this rval Buffer with a different type attached. Does
1071  * a dynamic type check if the source type is void. */
1072  template<typename T2, int D2 = D>
1075  as() && {
1077  return *((Buffer<T2, D2> *)this);
1078  }
1079 
1080  /** as_const() is syntactic sugar for .as<const T>(), to avoid the need
1081  * to recapitulate the type argument. */
1082  // @{
1085  // Note that we can skip the assert_can_convert_from(), since T -> const T
1086  // conversion is always legal.
1087  return *((Buffer<typename std::add_const<T>::type> *)this);
1088  }
1089 
1092  return *((const Buffer<typename std::add_const<T>::type> *)this);
1093  }
1094 
1097  return *((Buffer<typename std::add_const<T>::type> *)this);
1098  }
1099  // @}
1100 
1101  /** Conventional names for the first three dimensions. */
1102  // @{
1103  int width() const {
1104  return (dimensions() > 0) ? dim(0).extent() : 1;
1105  }
1106  int height() const {
1107  return (dimensions() > 1) ? dim(1).extent() : 1;
1108  }
1109  int channels() const {
1110  return (dimensions() > 2) ? dim(2).extent() : 1;
1111  }
1112  // @}
1113 
1114  /** Conventional names for the min and max value of each dimension */
1115  // @{
1116  int left() const {
1117  return dim(0).min();
1118  }
1119 
1120  int right() const {
1121  return dim(0).max();
1122  }
1123 
1124  int top() const {
1125  return dim(1).min();
1126  }
1127 
1128  int bottom() const {
1129  return dim(1).max();
1130  }
1131  // @}
1132 
1133  /** Make a new image which is a deep copy of this image. Use crop
1134  * or slice followed by copy to make a copy of only a portion of
1135  * the image. The new image uses the same memory layout as the
1136  * original, with holes compacted away. Note that the returned
1137  * Buffer is always of a non-const type T (ie:
1138  *
1139  * Buffer<const T>.copy() -> Buffer<T> rather than Buffer<const T>
1140  *
1141  * which is always safe, since we are making a deep copy. (The caller
1142  * can easily cast it back to Buffer<const T> if desired, which is
1143  * always safe and free.)
1144  */
1145  Buffer<not_const_T, D> copy(void *(*allocate_fn)(size_t) = nullptr,
1146  void (*deallocate_fn)(void *) = nullptr) const {
1147  Buffer<not_const_T, D> dst = Buffer<not_const_T, D>::make_with_shape_of(*this, allocate_fn, deallocate_fn);
1148  dst.copy_from(*this);
1149  return dst;
1150  }
1151 
1152  /** Like copy(), but the copy is created in interleaved memory layout
1153  * (vs. keeping the same memory layout as the original). Requires that 'this'
1154  * has exactly 3 dimensions.
1155  */
1156  Buffer<not_const_T, D> copy_to_interleaved(void *(*allocate_fn)(size_t) = nullptr,
1157  void (*deallocate_fn)(void *) = nullptr) const {
1158  assert(dimensions() == 3);
1160  dst.set_min(min(0), min(1), min(2));
1161  dst.allocate(allocate_fn, deallocate_fn);
1162  dst.copy_from(*this);
1163  return dst;
1164  }
1165 
1166  /** Like copy(), but the copy is created in planar memory layout
1167  * (vs. keeping the same memory layout as the original).
1168  */
1169  Buffer<not_const_T, D> copy_to_planar(void *(*allocate_fn)(size_t) = nullptr,
1170  void (*deallocate_fn)(void *) = nullptr) const {
1171  std::vector<int> mins, extents;
1172  const int dims = dimensions();
1173  mins.reserve(dims);
1174  extents.reserve(dims);
1175  for (int d = 0; d < dims; ++d) {
1176  mins.push_back(dim(d).min());
1177  extents.push_back(dim(d).extent());
1178  }
1180  dst.set_min(mins);
1181  dst.allocate(allocate_fn, deallocate_fn);
1182  dst.copy_from(*this);
1183  return dst;
1184  }
1185 
1186  /** Make a copy of the Buffer which shares the underlying host and/or device
1187  * allocations as the existing Buffer. This is purely syntactic sugar for
1188  * cases where you have a const reference to a Buffer but need a temporary
1189  * non-const copy (e.g. to make a call into AOT-generated Halide code), and want a terse
1190  * inline way to create a temporary. \code
1191  * void call_my_func(const Buffer<const uint8_t>& input) {
1192  * my_func(input.alias(), output);
1193  * }\endcode
1194  */
1195  inline Buffer<T, D> alias() const {
1196  return *this;
1197  }
1198 
1199  /** Fill a Buffer with the values at the same coordinates in
1200  * another Buffer. Restricts itself to coordinates contained
1201  * within the intersection of the two buffers. If the two Buffers
1202  * are not in the same coordinate system, you will need to
1203  * translate the argument Buffer first. E.g. if you're blitting a
1204  * sprite onto a framebuffer, you'll want to translate the sprite
1205  * to the correct location first like so: \code
1206  * framebuffer.copy_from(sprite.translated({x, y})); \endcode
1207  */
1208  template<typename T2, int D2>
1209  void copy_from(const Buffer<T2, D2> &other) {
1210  static_assert(!std::is_const<T>::value, "Cannot call copy_from() on a Buffer<const T>");
1211  assert(!device_dirty() && "Cannot call Halide::Runtime::Buffer::copy_from on a device dirty destination.");
1212  assert(!other.device_dirty() && "Cannot call Halide::Runtime::Buffer::copy_from on a device dirty source.");
1213 
1214  Buffer<const T, D> src(other);
1215  Buffer<T, D> dst(*this);
1216 
1217  assert(src.dimensions() == dst.dimensions());
1218 
1219  // Trim the copy to the region in common
1220  for (int i = 0; i < dimensions(); i++) {
1221  int min_coord = std::max(dst.dim(i).min(), src.dim(i).min());
1222  int max_coord = std::min(dst.dim(i).max(), src.dim(i).max());
1223  if (max_coord < min_coord) {
1224  // The buffers do not overlap.
1225  return;
1226  }
1227  dst.crop(i, min_coord, max_coord - min_coord + 1);
1228  src.crop(i, min_coord, max_coord - min_coord + 1);
1229  }
1230 
1231  // If T is void, we need to do runtime dispatch to an
1232  // appropriately-typed lambda. We're copying, so we only care
1233  // about the element size. (If not, this should optimize away
1234  // into a static dispatch to the right-sized copy.)
1235  if (T_is_void ? (type().bytes() == 1) : (sizeof(not_void_T) == 1)) {
1236  using MemType = uint8_t;
1237  auto &typed_dst = (Buffer<MemType, D> &)dst;
1238  auto &typed_src = (Buffer<const MemType, D> &)src;
1239  typed_dst.for_each_value([&](MemType &dst, MemType src) { dst = src; }, typed_src);
1240  } else if (T_is_void ? (type().bytes() == 2) : (sizeof(not_void_T) == 2)) {
1241  using MemType = uint16_t;
1242  auto &typed_dst = (Buffer<MemType, D> &)dst;
1243  auto &typed_src = (Buffer<const MemType, D> &)src;
1244  typed_dst.for_each_value([&](MemType &dst, MemType src) { dst = src; }, typed_src);
1245  } else if (T_is_void ? (type().bytes() == 4) : (sizeof(not_void_T) == 4)) {
1246  using MemType = uint32_t;
1247  auto &typed_dst = (Buffer<MemType, D> &)dst;
1248  auto &typed_src = (Buffer<const MemType, D> &)src;
1249  typed_dst.for_each_value([&](MemType &dst, MemType src) { dst = src; }, typed_src);
1250  } else if (T_is_void ? (type().bytes() == 8) : (sizeof(not_void_T) == 8)) {
1251  using MemType = uint64_t;
1252  auto &typed_dst = (Buffer<MemType, D> &)dst;
1253  auto &typed_src = (Buffer<const MemType, D> &)src;
1254  typed_dst.for_each_value([&](MemType &dst, MemType src) { dst = src; }, typed_src);
1255  } else {
1256  assert(false && "type().bytes() must be 1, 2, 4, or 8");
1257  }
1258  set_host_dirty();
1259  }
1260 
1261  /** Make an image that refers to a sub-range of this image along
1262  * the given dimension. Asserts that the crop region is within
1263  * the existing bounds: you cannot "crop outwards", even if you know there
1264  * is valid Buffer storage (e.g. because you already cropped inwards). */
1265  Buffer<T, D> cropped(int d, int min, int extent) const {
1266  // Make a fresh copy of the underlying buffer (but not a fresh
1267  // copy of the allocation, if there is one).
1268  Buffer<T, D> im = *this;
1269 
1270  // This guarantees the prexisting device ref is dropped if the
1271  // device_crop call fails and maintains the buffer in a consistent
1272  // state.
1273  im.device_deallocate();
1274 
1275  im.crop_host(d, min, extent);
1276  if (buf.device_interface != nullptr) {
1277  complete_device_crop(im);
1278  }
1279  return im;
1280  }
1281 
1282  /** Crop an image in-place along the given dimension. This does
1283  * not move any data around in memory - it just changes the min
1284  * and extent of the given dimension. */
1285  void crop(int d, int min, int extent) {
1286  // An optimization for non-device buffers. For the device case,
1287  // a temp buffer is required, so reuse the not-in-place version.
1288  // TODO(zalman|abadams): Are nop crops common enough to special
1289  // case the device part of the if to do nothing?
1290  if (buf.device_interface != nullptr) {
1291  *this = cropped(d, min, extent);
1292  } else {
1293  crop_host(d, min, extent);
1294  }
1295  }
1296 
1297  /** Make an image that refers to a sub-rectangle of this image along
1298  * the first N dimensions. Asserts that the crop region is within
1299  * the existing bounds. The cropped image may drop any device handle
1300  * if the device_interface cannot accomplish the crop in-place. */
1301  Buffer<T, D> cropped(const std::vector<std::pair<int, int>> &rect) const {
1302  // Make a fresh copy of the underlying buffer (but not a fresh
1303  // copy of the allocation, if there is one).
1304  Buffer<T, D> im = *this;
1305 
1306  // This guarantees the prexisting device ref is dropped if the
1307  // device_crop call fails and maintains the buffer in a consistent
1308  // state.
1309  im.device_deallocate();
1310 
1311  im.crop_host(rect);
1312  if (buf.device_interface != nullptr) {
1313  complete_device_crop(im);
1314  }
1315  return im;
1316  }
1317 
1318  /** Crop an image in-place along the first N dimensions. This does
1319  * not move any data around in memory, nor does it free memory. It
1320  * just rewrites the min/extent of each dimension to refer to a
1321  * subregion of the same allocation. */
1322  void crop(const std::vector<std::pair<int, int>> &rect) {
1323  // An optimization for non-device buffers. For the device case,
1324  // a temp buffer is required, so reuse the not-in-place version.
1325  // TODO(zalman|abadams): Are nop crops common enough to special
1326  // case the device part of the if to do nothing?
1327  if (buf.device_interface != nullptr) {
1328  *this = cropped(rect);
1329  } else {
1330  crop_host(rect);
1331  }
1332  }
1333 
1334  /** Make an image which refers to the same data with using
1335  * translated coordinates in the given dimension. Positive values
1336  * move the image data to the right or down relative to the
1337  * coordinate system. Drops any device handle. */
1338  Buffer<T, D> translated(int d, int dx) const {
1339  Buffer<T, D> im = *this;
1340  im.translate(d, dx);
1341  return im;
1342  }
1343 
1344  /** Translate an image in-place along one dimension by changing
1345  * how it is indexed. Does not move any data around in memory. */
1346  void translate(int d, int delta) {
1347  assert(d >= 0 && d < this->dimensions());
1349  buf.dim[d].min += delta;
1350  }
1351 
1352  /** Make an image which refers to the same data translated along
1353  * the first N dimensions. */
1354  Buffer<T, D> translated(const std::vector<int> &delta) const {
1355  Buffer<T, D> im = *this;
1356  im.translate(delta);
1357  return im;
1358  }
1359 
1360  /** Translate an image along the first N dimensions by changing
1361  * how it is indexed. Does not move any data around in memory. */
1362  void translate(const std::vector<int> &delta) {
1364  assert(delta.size() <= static_cast<decltype(delta.size())>(std::numeric_limits<int>::max()));
1365  int limit = (int)delta.size();
1366  assert(limit <= dimensions());
1367  for (int i = 0; i < limit; i++) {
1368  translate(i, delta[i]);
1369  }
1370  }
1371 
1372  /** Set the min coordinate of an image in the first N dimensions. */
1373  // @{
1374  void set_min(const std::vector<int> &mins) {
1375  assert(mins.size() <= static_cast<decltype(mins.size())>(dimensions()));
1377  for (size_t i = 0; i < mins.size(); i++) {
1378  buf.dim[i].min = mins[i];
1379  }
1380  }
1381 
1382  template<typename... Args>
1383  void set_min(Args... args) {
1384  set_min(std::vector<int>{args...});
1385  }
1386  // @}
1387 
1388  /** Test if a given coordinate is within the bounds of an image. */
1389  // @{
1390  bool contains(const std::vector<int> &coords) const {
1391  assert(coords.size() <= static_cast<decltype(coords.size())>(dimensions()));
1392  for (size_t i = 0; i < coords.size(); i++) {
1393  if (coords[i] < dim((int)i).min() || coords[i] > dim((int)i).max()) {
1394  return false;
1395  }
1396  }
1397  return true;
1398  }
1399 
1400  template<typename... Args>
1401  bool contains(Args... args) const {
1402  return contains(std::vector<int>{args...});
1403  }
1404  // @}
1405 
1406  /** Make a buffer which refers to the same data in the same layout
1407  * using a swapped indexing order for the dimensions given. So
1408  * A = B.transposed(0, 1) means that A(i, j) == B(j, i), and more
1409  * strongly that A.address_of(i, j) == B.address_of(j, i). */
1410  Buffer<T, D> transposed(int d1, int d2) const {
1411  Buffer<T, D> im = *this;
1412  im.transpose(d1, d2);
1413  return im;
1414  }
1415 
1416  /** Transpose a buffer in-place by changing how it is indexed. For
1417  * example, transpose(0, 1) on a two-dimensional buffer means that
1418  * the value referred to by coordinates (i, j) is now reached at
1419  * the coordinates (j, i), and vice versa. This is done by
1420  * reordering the per-dimension metadata rather than by moving
1421  * data around in memory, so other views of the same memory will
1422  * not see the data as having been transposed. */
1423  void transpose(int d1, int d2) {
1424  assert(d1 >= 0 && d1 < this->dimensions());
1425  assert(d2 >= 0 && d2 < this->dimensions());
1426  std::swap(buf.dim[d1], buf.dim[d2]);
1427  }
1428 
1429  /** A generalized transpose: instead of swapping two dimensions,
1430  * pass a vector that lists each dimension index exactly once, in
1431  * the desired order. This does not move any data around in memory
1432  * - it just permutes how it is indexed. */
1433  void transpose(const std::vector<int> &order) {
1434  assert((int)order.size() == dimensions());
1435  if (dimensions() < 2) {
1436  // My, that was easy
1437  return;
1438  }
1439 
1440  std::vector<int> order_sorted = order;
1441  for (size_t i = 1; i < order_sorted.size(); i++) {
1442  for (size_t j = i; j > 0 && order_sorted[j - 1] > order_sorted[j]; j--) {
1443  std::swap(order_sorted[j], order_sorted[j - 1]);
1444  transpose(j, j - 1);
1445  }
1446  }
1447  }
1448 
1449  /** Make a buffer which refers to the same data in the same
1450  * layout using a different ordering of the dimensions. */
1451  Buffer<T, D> transposed(const std::vector<int> &order) const {
1452  Buffer<T, D> im = *this;
1453  im.transpose(order);
1454  return im;
1455  }
1456 
1457  /** Make a lower-dimensional buffer that refers to one slice of
1458  * this buffer. */
1459  Buffer<T, D> sliced(int d, int pos) const {
1460  Buffer<T, D> im = *this;
1461 
1462  // This guarantees the prexisting device ref is dropped if the
1463  // device_slice call fails and maintains the buffer in a consistent
1464  // state.
1465  im.device_deallocate();
1466 
1467  im.slice_host(d, pos);
1468  if (buf.device_interface != nullptr) {
1469  complete_device_slice(im, d, pos);
1470  }
1471  return im;
1472  }
1473 
1474  /** Make a lower-dimensional buffer that refers to one slice of this
1475  * buffer at the dimension's minimum. */
1476  inline Buffer<T, D> sliced(int d) const {
1477  return sliced(d, dim(d).min());
1478  }
1479 
1480  /** Rewrite the buffer to refer to a single lower-dimensional
1481  * slice of itself along the given dimension at the given
1482  * coordinate. Does not move any data around or free the original
1483  * memory, so other views of the same data are unaffected. */
1484  void slice(int d, int pos) {
1485  // An optimization for non-device buffers. For the device case,
1486  // a temp buffer is required, so reuse the not-in-place version.
1487  // TODO(zalman|abadams): Are nop slices common enough to special
1488  // case the device part of the if to do nothing?
1489  if (buf.device_interface != nullptr) {
1490  *this = sliced(d, pos);
1491  } else {
1492  slice_host(d, pos);
1493  }
1494  }
1495 
1496  /** Slice a buffer in-place at the dimension's minimum. */
1497  inline void slice(int d) {
1498  slice(d, dim(d).min());
1499  }
1500 
1501  /** Make a new buffer that views this buffer as a single slice in a
1502  * higher-dimensional space. The new dimension has extent one and
1503  * the given min. This operation is the opposite of slice. As an
1504  * example, the following condition is true:
1505  *
1506  \code
1507  im2 = im.embedded(1, 17);
1508  &im(x, y, c) == &im2(x, 17, y, c);
1509  \endcode
1510  */
1511  Buffer<T, D> embedded(int d, int pos = 0) const {
1512  Buffer<T, D> im(*this);
1513  im.embed(d, pos);
1514  return im;
1515  }
1516 
1517  /** Embed a buffer in-place, increasing the
1518  * dimensionality. */
1519  void embed(int d, int pos = 0) {
1520  assert(d >= 0 && d <= dimensions());
1521  add_dimension();
1522  translate(dimensions() - 1, pos);
1523  for (int i = dimensions() - 1; i > d; i--) {
1524  transpose(i, i - 1);
1525  }
1526  }
1527 
1528  /** Add a new dimension with a min of zero and an extent of
1529  * one. The stride is the extent of the outermost dimension times
1530  * its stride. The new dimension is the last dimension. This is a
1531  * special case of embed. */
1532  void add_dimension() {
1533  const int dims = buf.dimensions;
1534  buf.dimensions++;
1535  if (buf.dim != shape) {
1536  // We're already on the heap. Reallocate.
1537  halide_dimension_t *new_shape = new halide_dimension_t[buf.dimensions];
1538  for (int i = 0; i < dims; i++) {
1539  new_shape[i] = buf.dim[i];
1540  }
1541  delete[] buf.dim;
1542  buf.dim = new_shape;
1543  } else if (dims == D) {
1544  // Transition from the in-class storage to the heap
1545  make_shape_storage(buf.dimensions);
1546  for (int i = 0; i < dims; i++) {
1547  buf.dim[i] = shape[i];
1548  }
1549  } else {
1550  // We still fit in the class
1551  }
1552  buf.dim[dims] = {0, 1, 0};
1553  if (dims == 0) {
1554  buf.dim[dims].stride = 1;
1555  } else {
1556  buf.dim[dims].stride = buf.dim[dims - 1].extent * buf.dim[dims - 1].stride;
1557  }
1558  }
1559 
1560  /** Add a new dimension with a min of zero, an extent of one, and
1561  * the specified stride. The new dimension is the last
1562  * dimension. This is a special case of embed. */
1564  add_dimension();
1565  buf.dim[buf.dimensions - 1].stride = s;
1566  }
1567 
1568  /** Methods for managing any GPU allocation. */
1569  // @{
1570  // Set the host dirty flag. Called by every operator()
1571  // access. Must be inlined so it can be hoisted out of loops.
1573  void set_host_dirty(bool v = true) {
1574  assert((!v || !device_dirty()) && "Cannot set host dirty when device is already dirty. Call copy_to_host() before accessing the buffer from host.");
1575  buf.set_host_dirty(v);
1576  }
1577 
1578  // Check if the device allocation is dirty. Called by
1579  // set_host_dirty, which is called by every accessor. Must be
1580  // inlined so it can be hoisted out of loops.
1582  bool device_dirty() const {
1583  return buf.device_dirty();
1584  }
1585 
1586  bool host_dirty() const {
1587  return buf.host_dirty();
1588  }
1589 
1590  void set_device_dirty(bool v = true) {
1591  assert((!v || !host_dirty()) && "Cannot set device dirty when host is already dirty.");
1592  buf.set_device_dirty(v);
1593  }
1594 
1595  int copy_to_host(void *ctx = nullptr) {
1596  if (device_dirty()) {
1597  return buf.device_interface->copy_to_host(ctx, &buf);
1598  }
1599  return 0;
1600  }
1601 
1602  int copy_to_device(const struct halide_device_interface_t *device_interface, void *ctx = nullptr) {
1603  if (host_dirty()) {
1604  return device_interface->copy_to_device(ctx, &buf, device_interface);
1605  }
1606  return 0;
1607  }
1608 
1609  int device_malloc(const struct halide_device_interface_t *device_interface, void *ctx = nullptr) {
1610  return device_interface->device_malloc(ctx, &buf, device_interface);
1611  }
1612 
1613  int device_free(void *ctx = nullptr) {
1614  if (dev_ref_count) {
1615  assert(dev_ref_count->ownership == BufferDeviceOwnership::Allocated &&
1616  "Can't call device_free on an unmanaged or wrapped native device handle. "
1617  "Free the source allocation or call device_detach_native instead.");
1618  // Multiple people may be holding onto this dev field
1619  assert(dev_ref_count->count == 1 &&
1620  "Multiple Halide::Runtime::Buffer objects share this device "
1621  "allocation. Freeing it would create dangling references. "
1622  "Don't call device_free on Halide buffers that you have copied or "
1623  "passed by value.");
1624  }
1625  int ret = 0;
1626  if (buf.device_interface) {
1627  ret = buf.device_interface->device_free(ctx, &buf);
1628  }
1629  if (dev_ref_count) {
1630  delete dev_ref_count;
1631  dev_ref_count = nullptr;
1632  }
1633  return ret;
1634  }
1635 
1636  int device_wrap_native(const struct halide_device_interface_t *device_interface,
1637  uint64_t handle, void *ctx = nullptr) {
1638  assert(device_interface);
1639  dev_ref_count = new DeviceRefCount;
1641  return device_interface->wrap_native(ctx, &buf, handle, device_interface);
1642  }
1643 
1644  int device_detach_native(void *ctx = nullptr) {
1645  assert(dev_ref_count &&
1646  dev_ref_count->ownership == BufferDeviceOwnership::WrappedNative &&
1647  "Only call device_detach_native on buffers wrapping a native "
1648  "device handle via device_wrap_native. This buffer was allocated "
1649  "using device_malloc, or is unmanaged. "
1650  "Call device_free or free the original allocation instead.");
1651  // Multiple people may be holding onto this dev field
1652  assert(dev_ref_count->count == 1 &&
1653  "Multiple Halide::Runtime::Buffer objects share this device "
1654  "allocation. Freeing it could create dangling references. "
1655  "Don't call device_detach_native on Halide buffers that you "
1656  "have copied or passed by value.");
1657  int ret = 0;
1658  if (buf.device_interface) {
1659  ret = buf.device_interface->detach_native(ctx, &buf);
1660  }
1661  delete dev_ref_count;
1662  dev_ref_count = nullptr;
1663  return ret;
1664  }
1665 
1666  int device_and_host_malloc(const struct halide_device_interface_t *device_interface, void *ctx = nullptr) {
1667  return device_interface->device_and_host_malloc(ctx, &buf, device_interface);
1668  }
1669 
1670  int device_and_host_free(const struct halide_device_interface_t *device_interface, void *ctx = nullptr) {
1671  if (dev_ref_count) {
1672  assert(dev_ref_count->ownership == BufferDeviceOwnership::AllocatedDeviceAndHost &&
1673  "Can't call device_and_host_free on a device handle not allocated with device_and_host_malloc. "
1674  "Free the source allocation or call device_detach_native instead.");
1675  // Multiple people may be holding onto this dev field
1676  assert(dev_ref_count->count == 1 &&
1677  "Multiple Halide::Runtime::Buffer objects share this device "
1678  "allocation. Freeing it would create dangling references. "
1679  "Don't call device_and_host_free on Halide buffers that you have copied or "
1680  "passed by value.");
1681  }
1682  int ret = 0;
1683  if (buf.device_interface) {
1684  ret = buf.device_interface->device_and_host_free(ctx, &buf);
1685  }
1686  if (dev_ref_count) {
1687  delete dev_ref_count;
1688  dev_ref_count = nullptr;
1689  }
1690  return ret;
1691  }
1692 
1693  int device_sync(void *ctx = nullptr) {
1694  if (buf.device_interface) {
1695  return buf.device_interface->device_sync(ctx, &buf);
1696  } else {
1697  return 0;
1698  }
1699  }
1700 
1701  bool has_device_allocation() const {
1702  return buf.device != 0;
1703  }
1704 
1705  /** Return the method by which the device field is managed. */
1707  if (dev_ref_count == nullptr) {
1709  }
1710  return dev_ref_count->ownership;
1711  }
1712  // @}
1713 
1714  /** If you use the (x, y, c) indexing convention, then Halide
1715  * Buffers are stored planar by default. This function constructs
1716  * an interleaved RGB or RGBA image that can still be indexed
1717  * using (x, y, c). Passing it to a generator requires that the
1718  * generator has been compiled with support for interleaved (also
1719  * known as packed or chunky) memory layouts. */
1722  // Note that this is equivalent to calling transpose({2, 0, 1}),
1723  // but slightly more efficient.
1724  im.transpose(0, 1);
1725  im.transpose(1, 2);
1726  return im;
1727  }
1728 
1729  /** If you use the (x, y, c) indexing convention, then Halide
1730  * Buffers are stored planar by default. This function constructs
1731  * an interleaved RGB or RGBA image that can still be indexed
1732  * using (x, y, c). Passing it to a generator requires that the
1733  * generator has been compiled with support for interleaved (also
1734  * known as packed or chunky) memory layouts. */
1737  }
1738 
1739  /** Wrap an existing interleaved image. */
1743  im.transpose(0, 1);
1744  im.transpose(1, 2);
1745  return im;
1746  }
1747 
1748  /** Wrap an existing interleaved image. */
1751  }
1752 
1753  /** Make a zero-dimensional Buffer */
1756  buf.slice(0, 0);
1757  return buf;
1758  }
1759 
1760  /** Make a zero-dimensional Buffer */
1762  Buffer<T, 1> buf(1);
1763  buf.slice(0, 0);
1764  return buf;
1765  }
1766 
1767  /** Make a zero-dimensional Buffer that points to non-owned, existing data */
1769  Buffer<T, 1> buf(data, 1);
1770  buf.slice(0, 0);
1771  return buf;
1772  }
1773 
1774  /** Make a buffer with the same shape and memory nesting order as
1775  * another buffer. It may have a different type. */
1776  template<typename T2, int D2>
1778  void *(*allocate_fn)(size_t) = nullptr,
1779  void (*deallocate_fn)(void *) = nullptr) {
1780 
1781  const halide_type_t dst_type = T_is_void ? src.type() : halide_type_of<typename std::remove_cv<not_void_T>::type>();
1782  return Buffer<>::make_with_shape_of_helper(dst_type, src.dimensions(), src.buf.dim,
1783  allocate_fn, deallocate_fn);
1784  }
1785 
1786 private:
1787  static Buffer<> make_with_shape_of_helper(halide_type_t dst_type,
1788  int dimensions,
1789  halide_dimension_t *shape,
1790  void *(*allocate_fn)(size_t),
1791  void (*deallocate_fn)(void *)) {
1792  // Reorder the dimensions of src to have strides in increasing order
1793  std::vector<int> swaps;
1794  for (int i = dimensions - 1; i > 0; i--) {
1795  for (int j = i; j > 0; j--) {
1796  if (shape[j - 1].stride > shape[j].stride) {
1797  std::swap(shape[j - 1], shape[j]);
1798  swaps.push_back(j);
1799  }
1800  }
1801  }
1802 
1803  // Rewrite the strides to be dense (this messes up src, which
1804  // is why we took it by value).
1805  for (int i = 0; i < dimensions; i++) {
1806  if (i == 0) {
1807  shape[i].stride = 1;
1808  } else {
1809  shape[i].stride = shape[i - 1].extent * shape[i - 1].stride;
1810  }
1811  }
1812 
1813  // Undo the dimension reordering
1814  while (!swaps.empty()) {
1815  int j = swaps.back();
1816  std::swap(shape[j - 1], shape[j]);
1817  swaps.pop_back();
1818  }
1819 
1820  // Use an explicit runtime type, and make dst a Buffer<void>, to allow
1821  // using this method with Buffer<void> for either src or dst.
1822  Buffer<> dst(dst_type, nullptr, dimensions, shape);
1823  dst.allocate(allocate_fn, deallocate_fn);
1824 
1825  return dst;
1826  }
1827 
1828  template<typename... Args>
1830  ptrdiff_t
1831  offset_of(int d, int first, Args... rest) const {
1832  return offset_of(d + 1, rest...) + this->buf.dim[d].stride * (first - this->buf.dim[d].min);
1833  }
1834 
1836  ptrdiff_t offset_of(int d) const {
1837  return 0;
1838  }
1839 
1840  template<typename... Args>
1842  storage_T *
1843  address_of(Args... args) const {
1844  if (T_is_void) {
1845  return (storage_T *)(this->buf.host) + offset_of(0, args...) * type().bytes();
1846  } else {
1847  return (storage_T *)(this->buf.host) + offset_of(0, args...);
1848  }
1849  }
1850 
1852  ptrdiff_t offset_of(const int *pos) const {
1853  ptrdiff_t offset = 0;
1854  for (int i = this->dimensions() - 1; i >= 0; i--) {
1855  offset += this->buf.dim[i].stride * (pos[i] - this->buf.dim[i].min);
1856  }
1857  return offset;
1858  }
1859 
1861  storage_T *address_of(const int *pos) const {
1862  if (T_is_void) {
1863  return (storage_T *)this->buf.host + offset_of(pos) * type().bytes();
1864  } else {
1865  return (storage_T *)this->buf.host + offset_of(pos);
1866  }
1867  }
1868 
1869 public:
1870  /** Get a pointer to the address of the min coordinate. */
1871  T *data() const {
1872  return (T *)(this->buf.host);
1873  }
1874 
1875  /** Access elements. Use im(...) to get a reference to an element,
1876  * and use &im(...) to get the address of an element. If you pass
1877  * fewer arguments than the buffer has dimensions, the rest are
1878  * treated as their min coordinate. The non-const versions set the
1879  * host_dirty flag to true.
1880  */
1881  //@{
1882  template<typename... Args,
1883  typename = typename std::enable_if<AllInts<Args...>::value>::type>
1884  HALIDE_ALWAYS_INLINE const not_void_T &operator()(int first, Args... rest) const {
1885  static_assert(!T_is_void,
1886  "Cannot use operator() on Buffer<void> types");
1887  assert(!device_dirty());
1888  return *((const not_void_T *)(address_of(first, rest...)));
1889  }
1890 
1892  const not_void_T &
1893  operator()() const {
1894  static_assert(!T_is_void,
1895  "Cannot use operator() on Buffer<void> types");
1896  assert(!device_dirty());
1897  return *((const not_void_T *)(data()));
1898  }
1899 
1901  const not_void_T &
1902  operator()(const int *pos) const {
1903  static_assert(!T_is_void,
1904  "Cannot use operator() on Buffer<void> types");
1905  assert(!device_dirty());
1906  return *((const not_void_T *)(address_of(pos)));
1907  }
1908 
1909  template<typename... Args,
1910  typename = typename std::enable_if<AllInts<Args...>::value>::type>
1912  not_void_T &
1913  operator()(int first, Args... rest) {
1914  static_assert(!T_is_void,
1915  "Cannot use operator() on Buffer<void> types");
1916  set_host_dirty();
1917  return *((not_void_T *)(address_of(first, rest...)));
1918  }
1919 
1921  not_void_T &
1923  static_assert(!T_is_void,
1924  "Cannot use operator() on Buffer<void> types");
1925  set_host_dirty();
1926  return *((not_void_T *)(data()));
1927  }
1928 
1930  not_void_T &
1931  operator()(const int *pos) {
1932  static_assert(!T_is_void,
1933  "Cannot use operator() on Buffer<void> types");
1934  set_host_dirty();
1935  return *((not_void_T *)(address_of(pos)));
1936  }
1937  // @}
1938 
1939  /** Tests that all values in this buffer are equal to val. */
1940  bool all_equal(not_void_T val) const {
1941  bool all_equal = true;
1942  for_each_element([&](const int *pos) { all_equal &= (*this)(pos) == val; });
1943  return all_equal;
1944  }
1945 
1946  Buffer<T, D> &fill(not_void_T val) {
1947  set_host_dirty();
1948  for_each_value([=](T &v) { v = val; });
1949  return *this;
1950  }
1951 
1952 private:
1953  /** Helper functions for for_each_value. */
1954  // @{
1955  template<int N>
1956  struct for_each_value_task_dim {
1957  int extent;
1958  int stride[N];
1959  };
1960 
1961  // Given an array of strides, and a bunch of pointers to pointers
1962  // (all of different types), advance the pointers using the
1963  // strides.
1964  template<typename Ptr, typename... Ptrs>
1965  HALIDE_ALWAYS_INLINE static void advance_ptrs(const int *stride, Ptr *ptr, Ptrs... ptrs) {
1966  (*ptr) += *stride;
1967  advance_ptrs(stride + 1, ptrs...);
1968  }
1969 
1971  static void advance_ptrs(const int *) {
1972  }
1973 
1974  // Same as the above, but just increments the pointers.
1975  template<typename Ptr, typename... Ptrs>
1976  HALIDE_ALWAYS_INLINE static void increment_ptrs(Ptr *ptr, Ptrs... ptrs) {
1977  (*ptr)++;
1978  increment_ptrs(ptrs...);
1979  }
1980 
1982  static void increment_ptrs() {
1983  }
1984 
1985  template<typename Fn, typename... Ptrs>
1986  HALIDE_NEVER_INLINE static void for_each_value_helper(Fn &&f, int d, bool innermost_strides_are_one,
1987  const for_each_value_task_dim<sizeof...(Ptrs)> *t, Ptrs... ptrs) {
1988  if (d == -1) {
1989  f((*ptrs)...);
1990  } else if (d == 0) {
1991  if (innermost_strides_are_one) {
1992  for (int i = t[0].extent; i != 0; i--) {
1993  f((*ptrs)...);
1994  increment_ptrs((&ptrs)...);
1995  }
1996  } else {
1997  for (int i = t[0].extent; i != 0; i--) {
1998  f((*ptrs)...);
1999  advance_ptrs(t[0].stride, (&ptrs)...);
2000  }
2001  }
2002  } else {
2003  for (int i = t[d].extent; i != 0; i--) {
2004  for_each_value_helper(f, d - 1, innermost_strides_are_one, t, ptrs...);
2005  advance_ptrs(t[d].stride, (&ptrs)...);
2006  }
2007  }
2008  }
2009 
2010  template<int N>
2011  HALIDE_NEVER_INLINE static bool for_each_value_prep(for_each_value_task_dim<N> *t,
2012  const halide_buffer_t **buffers) {
2013  // Check the buffers all have clean host allocations
2014  for (int i = 0; i < N; i++) {
2015  if (buffers[i]->device) {
2016  assert(buffers[i]->host &&
2017  "Buffer passed to for_each_value has device allocation but no host allocation. Call allocate() and copy_to_host() first");
2018  assert(!buffers[i]->device_dirty() &&
2019  "Buffer passed to for_each_value is dirty on device. Call copy_to_host() first");
2020  } else {
2021  assert(buffers[i]->host &&
2022  "Buffer passed to for_each_value has no host or device allocation");
2023  }
2024  }
2025 
2026  const int dimensions = buffers[0]->dimensions;
2027 
2028  // Extract the strides in all the dimensions
2029  for (int i = 0; i < dimensions; i++) {
2030  for (int j = 0; j < N; j++) {
2031  assert(buffers[j]->dimensions == dimensions);
2032  assert(buffers[j]->dim[i].extent == buffers[0]->dim[i].extent &&
2033  buffers[j]->dim[i].min == buffers[0]->dim[i].min);
2034  const int s = buffers[j]->dim[i].stride;
2035  t[i].stride[j] = s;
2036  }
2037  t[i].extent = buffers[0]->dim[i].extent;
2038 
2039  // Order the dimensions by stride, so that the traversal is cache-coherent.
2040  for (int j = i; j > 0 && t[j].stride[0] < t[j - 1].stride[0]; j--) {
2041  std::swap(t[j], t[j - 1]);
2042  }
2043  }
2044 
2045  // flatten dimensions where possible to make a larger inner
2046  // loop for autovectorization.
2047  int d = dimensions;
2048  for (int i = 1; i < d; i++) {
2049  bool flat = true;
2050  for (int j = 0; j < N; j++) {
2051  flat = flat && t[i - 1].stride[j] * t[i - 1].extent == t[i].stride[j];
2052  }
2053  if (flat) {
2054  t[i - 1].extent *= t[i].extent;
2055  for (int j = i; j < d; j++) {
2056  t[j] = t[j + 1];
2057  }
2058  i--;
2059  d--;
2060  t[d].extent = 1;
2061  }
2062  }
2063 
2064  bool innermost_strides_are_one = true;
2065  if (dimensions > 0) {
2066  for (int i = 0; i < N; i++) {
2067  innermost_strides_are_one &= (t[0].stride[i] == 1);
2068  }
2069  }
2070 
2071  return innermost_strides_are_one;
2072  }
2073 
2074  template<typename Fn, typename... Args, int N = sizeof...(Args) + 1>
2075  void for_each_value_impl(Fn &&f, Args &&... other_buffers) const {
2076  Buffer<>::for_each_value_task_dim<N> *t =
2077  (Buffer<>::for_each_value_task_dim<N> *)HALIDE_ALLOCA((dimensions() + 1) * sizeof(for_each_value_task_dim<N>));
2078  // Move the preparatory code into a non-templated helper to
2079  // save code size.
2080  const halide_buffer_t *buffers[] = {&buf, (&other_buffers.buf)...};
2081  bool innermost_strides_are_one = Buffer<>::for_each_value_prep(t, buffers);
2082 
2083  Buffer<>::for_each_value_helper(f, dimensions() - 1,
2084  innermost_strides_are_one,
2085  t,
2086  data(), (other_buffers.data())...);
2087  }
2088  // @}
2089 
2090 public:
2091  /** Call a function on every value in the buffer, and the
2092  * corresponding values in some number of other buffers of the
2093  * same size. The function should take a reference, const
2094  * reference, or value of the correct type for each buffer. This
2095  * effectively lifts a function of scalars to an element-wise
2096  * function of buffers. This produces code that the compiler can
2097  * autovectorize. This is slightly cheaper than for_each_element,
2098  * because it does not need to track the coordinates.
2099  *
2100  * Note that constness of Buffers is preserved: a const Buffer<T> (for either
2101  * 'this' or the other-buffers arguments) will allow mutation of the
2102  * buffer contents, while a Buffer<const T> will not. Attempting to specify
2103  * a mutable reference for the lambda argument of a Buffer<const T>
2104  * will result in a compilation error. */
2105  // @{
2106  template<typename Fn, typename... Args, int N = sizeof...(Args) + 1>
2107  HALIDE_ALWAYS_INLINE const Buffer<T, D> &for_each_value(Fn &&f, Args &&... other_buffers) const {
2108  for_each_value_impl(f, std::forward<Args>(other_buffers)...);
2109  return *this;
2110  }
2111 
2112  template<typename Fn, typename... Args, int N = sizeof...(Args) + 1>
2114  Buffer<T, D> &
2115  for_each_value(Fn &&f, Args &&... other_buffers) {
2116  for_each_value_impl(f, std::forward<Args>(other_buffers)...);
2117  return *this;
2118  }
2119  // @}
2120 
2121 private:
2122  // Helper functions for for_each_element
2123  struct for_each_element_task_dim {
2124  int min, max;
2125  };
2126 
2127  /** If f is callable with this many args, call it. The first
2128  * argument is just to make the overloads distinct. Actual
2129  * overload selection is done using the enable_if. */
2130  template<typename Fn,
2131  typename... Args,
2132  typename = decltype(std::declval<Fn>()(std::declval<Args>()...))>
2133  HALIDE_ALWAYS_INLINE static void for_each_element_variadic(int, int, const for_each_element_task_dim *, Fn &&f, Args... args) {
2134  f(args...);
2135  }
2136 
2137  /** If the above overload is impossible, we add an outer loop over
2138  * an additional argument and try again. */
2139  template<typename Fn,
2140  typename... Args>
2141  HALIDE_ALWAYS_INLINE static void for_each_element_variadic(double, int d, const for_each_element_task_dim *t, Fn &&f, Args... args) {
2142  for (int i = t[d].min; i <= t[d].max; i++) {
2143  for_each_element_variadic(0, d - 1, t, std::forward<Fn>(f), i, args...);
2144  }
2145  }
2146 
2147  /** Determine the minimum number of arguments a callable can take
2148  * using the same trick. */
2149  template<typename Fn,
2150  typename... Args,
2151  typename = decltype(std::declval<Fn>()(std::declval<Args>()...))>
2152  HALIDE_ALWAYS_INLINE static int num_args(int, Fn &&, Args...) {
2153  return (int)(sizeof...(Args));
2154  }
2155 
2156  /** The recursive version is only enabled up to a recursion limit
2157  * of 256. This catches callables that aren't callable with any
2158  * number of ints. */
2159  template<typename Fn,
2160  typename... Args>
2161  HALIDE_ALWAYS_INLINE static int num_args(double, Fn &&f, Args... args) {
2162  static_assert(sizeof...(args) <= 256,
2163  "Callable passed to for_each_element must accept either a const int *,"
2164  " or up to 256 ints. No such operator found. Expect infinite template recursion.");
2165  return num_args(0, std::forward<Fn>(f), 0, args...);
2166  }
2167 
2168  /** A version where the callable takes a position array instead,
2169  * with compile-time recursion on the dimensionality. This
2170  * overload is preferred to the one below using the same int vs
2171  * double trick as above, but is impossible once d hits -1 using
2172  * std::enable_if. */
2173  template<int d,
2174  typename Fn,
2175  typename = typename std::enable_if<(d >= 0)>::type>
2176  HALIDE_ALWAYS_INLINE static void for_each_element_array_helper(int, const for_each_element_task_dim *t, Fn &&f, int *pos) {
2177  for (pos[d] = t[d].min; pos[d] <= t[d].max; pos[d]++) {
2178  for_each_element_array_helper<d - 1>(0, t, std::forward<Fn>(f), pos);
2179  }
2180  }
2181 
2182  /** Base case for recursion above. */
2183  template<int d,
2184  typename Fn,
2185  typename = typename std::enable_if<(d < 0)>::type>
2186  HALIDE_ALWAYS_INLINE static void for_each_element_array_helper(double, const for_each_element_task_dim *t, Fn &&f, int *pos) {
2187  f(pos);
2188  }
2189 
2190  /** A run-time-recursive version (instead of
2191  * compile-time-recursive) that requires the callable to take a
2192  * pointer to a position array instead. Dispatches to the
2193  * compile-time-recursive version once the dimensionality gets
2194  * small. */
2195  template<typename Fn>
2196  static void for_each_element_array(int d, const for_each_element_task_dim *t, Fn &&f, int *pos) {
2197  if (d == -1) {
2198  f(pos);
2199  } else if (d == 0) {
2200  // Once the dimensionality gets small enough, dispatch to
2201  // a compile-time-recursive version for better codegen of
2202  // the inner loops.
2203  for_each_element_array_helper<0, Fn>(0, t, std::forward<Fn>(f), pos);
2204  } else if (d == 1) {
2205  for_each_element_array_helper<1, Fn>(0, t, std::forward<Fn>(f), pos);
2206  } else if (d == 2) {
2207  for_each_element_array_helper<2, Fn>(0, t, std::forward<Fn>(f), pos);
2208  } else if (d == 3) {
2209  for_each_element_array_helper<3, Fn>(0, t, std::forward<Fn>(f), pos);
2210  } else {
2211  for (pos[d] = t[d].min; pos[d] <= t[d].max; pos[d]++) {
2212  for_each_element_array(d - 1, t, std::forward<Fn>(f), pos);
2213  }
2214  }
2215  }
2216 
2217  /** We now have two overloads for for_each_element. This one
2218  * triggers if the callable takes a const int *.
2219  */
2220  template<typename Fn,
2221  typename = decltype(std::declval<Fn>()((const int *)nullptr))>
2222  static void for_each_element(int, int dims, const for_each_element_task_dim *t, Fn &&f, int check = 0) {
2223  int *pos = (int *)HALIDE_ALLOCA(dims * sizeof(int));
2224  for_each_element_array(dims - 1, t, std::forward<Fn>(f), pos);
2225  }
2226 
2227  /** This one triggers otherwise. It treats the callable as
2228  * something that takes some number of ints. */
2229  template<typename Fn>
2230  HALIDE_ALWAYS_INLINE static void for_each_element(double, int dims, const for_each_element_task_dim *t, Fn &&f) {
2231  int args = num_args(0, std::forward<Fn>(f));
2232  assert(dims >= args);
2233  for_each_element_variadic(0, args - 1, t, std::forward<Fn>(f));
2234  }
2235 
2236  template<typename Fn>
2237  void for_each_element_impl(Fn &&f) const {
2238  for_each_element_task_dim *t =
2239  (for_each_element_task_dim *)HALIDE_ALLOCA(dimensions() * sizeof(for_each_element_task_dim));
2240  for (int i = 0; i < dimensions(); i++) {
2241  t[i].min = dim(i).min();
2242  t[i].max = dim(i).max();
2243  }
2244  for_each_element(0, dimensions(), t, std::forward<Fn>(f));
2245  }
2246 
2247 public:
2248  /** Call a function at each site in a buffer. This is likely to be
2249  * much slower than using Halide code to populate a buffer, but is
2250  * convenient for tests. If the function has more arguments than the
2251  * buffer has dimensions, the remaining arguments will be zero. If it
2252  * has fewer arguments than the buffer has dimensions then the last
2253  * few dimensions of the buffer are not iterated over. For example,
2254  * the following code exploits this to set a floating point RGB image
2255  * to red:
2256 
2257  \code
2258  Buffer<float, 3> im(100, 100, 3);
2259  im.for_each_element([&](int x, int y) {
2260  im(x, y, 0) = 1.0f;
2261  im(x, y, 1) = 0.0f;
2262  im(x, y, 2) = 0.0f:
2263  });
2264  \endcode
2265 
2266  * The compiled code is equivalent to writing the a nested for loop,
2267  * and compilers are capable of optimizing it in the same way.
2268  *
2269  * If the callable can be called with an int * as the sole argument,
2270  * that version is called instead. Each location in the buffer is
2271  * passed to it in a coordinate array. This version is higher-overhead
2272  * than the variadic version, but is useful for writing generic code
2273  * that accepts buffers of arbitrary dimensionality. For example, the
2274  * following sets the value at all sites in an arbitrary-dimensional
2275  * buffer to their first coordinate:
2276 
2277  \code
2278  im.for_each_element([&](const int *pos) {im(pos) = pos[0];});
2279  \endcode
2280 
2281  * It is also possible to use for_each_element to iterate over entire
2282  * rows or columns by cropping the buffer to a single column or row
2283  * respectively and iterating over elements of the result. For example,
2284  * to set the diagonal of the image to 1 by iterating over the columns:
2285 
2286  \code
2287  Buffer<float, 3> im(100, 100, 3);
2288  im.sliced(1, 0).for_each_element([&](int x, int c) {
2289  im(x, x, c) = 1.0f;
2290  });
2291  \endcode
2292 
2293  * Or, assuming the memory layout is known to be dense per row, one can
2294  * memset each row of an image like so:
2295 
2296  \code
2297  Buffer<float, 3> im(100, 100, 3);
2298  im.sliced(0, 0).for_each_element([&](int y, int c) {
2299  memset(&im(0, y, c), 0, sizeof(float) * im.width());
2300  });
2301  \endcode
2302 
2303  */
2304  // @{
2305  template<typename Fn>
2307  for_each_element_impl(f);
2308  return *this;
2309  }
2310 
2311  template<typename Fn>
2313  Buffer<T, D> &
2315  for_each_element_impl(f);
2316  return *this;
2317  }
2318  // @}
2319 
2320 private:
2321  template<typename Fn>
2322  struct FillHelper {
2323  Fn f;
2324  Buffer<T, D> *buf;
2325 
2326  template<typename... Args,
2327  typename = decltype(std::declval<Fn>()(std::declval<Args>()...))>
2328  void operator()(Args... args) {
2329  (*buf)(args...) = f(args...);
2330  }
2331 
2332  FillHelper(Fn &&f, Buffer<T, D> *buf)
2333  : f(std::forward<Fn>(f)), buf(buf) {
2334  }
2335  };
2336 
2337 public:
2338  /** Fill a buffer by evaluating a callable at every site. The
2339  * callable should look much like a callable passed to
2340  * for_each_element, but it should return the value that should be
2341  * stored to the coordinate corresponding to the arguments. */
2342  template<typename Fn,
2343  typename = typename std::enable_if<!std::is_arithmetic<typename std::decay<Fn>::type>::value>::type>
2344  Buffer<T, D> &fill(Fn &&f) {
2345  // We'll go via for_each_element. We need a variadic wrapper lambda.
2346  FillHelper<Fn> wrapper(std::forward<Fn>(f), this);
2347  return for_each_element(wrapper);
2348  }
2349 
2350  /** Check if an input buffer passed extern stage is a querying
2351  * bounds. Compared to doing the host pointer check directly,
2352  * this both adds clarity to code and will facilitate moving to
2353  * another representation for bounds query arguments. */
2354  bool is_bounds_query() const {
2355  return buf.is_bounds_query();
2356  }
2357 
2358  /** Convenient check to verify that all of the interesting bytes in the Buffer
2359  * are initialized under MSAN. Note that by default, we use for_each_value() here so that
2360  * we skip any unused padding that isn't part of the Buffer; this isn't efficient,
2361  * but in MSAN mode, it doesn't matter. (Pass true for the flag to force check
2362  * the entire Buffer storage.) */
2363  void msan_check_mem_is_initialized(bool entire = false) const {
2364 #if defined(__has_feature)
2365 #if __has_feature(memory_sanitizer)
2366  if (entire) {
2367  __msan_check_mem_is_initialized(data(), size_in_bytes());
2368  } else {
2369  for_each_value([](T &v) { __msan_check_mem_is_initialized(&v, sizeof(T)); ; });
2370  }
2371 #endif
2372 #endif
2373  }
2374 };
2375 
2376 } // namespace Runtime
2377 } // namespace Halide
2378 
2379 #undef HALIDE_ALLOCA
2380 
2381 #endif // HALIDE_RUNTIME_IMAGE_H
Halide::Runtime::Buffer::dimensions
int dimensions() const
Get the dimensionality of the buffer.
Definition: HalideBuffer.h:525
Halide::Runtime::Buffer::raw_buffer
const halide_buffer_t * raw_buffer() const
Definition: HalideBuffer.h:1035
Halide::Runtime::Buffer::slice
void slice(int d, int pos)
Rewrite the buffer to refer to a single lower-dimensional slice of itself along the given dimension a...
Definition: HalideBuffer.h:1484
Halide::Runtime::Buffer::operator()
const HALIDE_ALWAYS_INLINE not_void_T & operator()(const int *pos) const
Definition: HalideBuffer.h:1902
halide_dimension_t
Definition: HalideRuntime.h:1365
Halide::Runtime::Buffer::device_and_host_malloc
int device_and_host_malloc(const struct halide_device_interface_t *device_interface, void *ctx=nullptr)
Definition: HalideBuffer.h:1666
Halide::Runtime::Buffer::device_detach_native
int device_detach_native(void *ctx=nullptr)
Definition: HalideBuffer.h:1644
Halide::Runtime::Buffer::Dimension::iterator
An iterator class, so that you can iterate over coordinates in a dimensions using a range-based for l...
Definition: HalideBuffer.h:467
Halide::Runtime::Buffer::size_in_bytes
size_t size_in_bytes() const
The total number of bytes spanned by the data in memory.
Definition: HalideBuffer.h:575
Halide::Runtime::Buffer::Buffer
Buffer(T *data, const std::vector< int > &sizes)
Initialize an Buffer from a pointer and a vector of sizes.
Definition: HalideBuffer.h:958
halide_buffer_t::dim
halide_dimension_t * dim
The shape of the buffer.
Definition: HalideRuntime.h:1427
Halide::Runtime::Buffer::as
HALIDE_ALWAYS_INLINE Buffer< T2, D2 > & as() &
Return a typed reference to this Buffer.
Definition: HalideBuffer.h:1054
uint8_t
unsigned __INT8_TYPE__ uint8_t
Definition: runtime_internal.h:25
Halide::Runtime::Buffer::Buffer
friend class Buffer
Give Buffers access to the members of Buffers of different dimensionalities and types.
Definition: HalideBuffer.h:601
Halide::Runtime::Buffer::operator()
const HALIDE_ALWAYS_INLINE not_void_T & operator()() const
Definition: HalideBuffer.h:1893
Halide::Runtime::Buffer::add_dimension_with_stride
void add_dimension_with_stride(int s)
Add a new dimension with a min of zero, an extent of one, and the specified stride.
Definition: HalideBuffer.h:1563
Halide::Runtime::Buffer::dim
HALIDE_ALWAYS_INLINE Dimension dim(int i) const
Access the shape of the buffer.
Definition: HalideBuffer.h:496
Halide::Runtime::Buffer::translate
void translate(const std::vector< int > &delta)
Translate an image along the first N dimensions by changing how it is indexed.
Definition: HalideBuffer.h:1362
Halide::Runtime::Buffer::Buffer
Buffer(int first, int second, Args... rest)
Definition: HalideBuffer.h:852
uint16_t
unsigned __INT16_TYPE__ uint16_t
Definition: runtime_internal.h:23
Halide::Runtime::Buffer::Dimension::Dimension
Dimension(const halide_dimension_t &dim)
Definition: HalideBuffer.h:491
halide_dimension_t::extent
int32_t extent
Definition: HalideRuntime.h:1366
Halide::Runtime::Buffer::begin
T * begin() const
A pointer to the element with the lowest address.
Definition: HalideBuffer.h:563
Halide::Runtime::Buffer::transpose
void transpose(const std::vector< int > &order)
A generalized transpose: instead of swapping two dimensions, pass a vector that lists each dimension ...
Definition: HalideBuffer.h:1433
Halide::Runtime::Buffer::width
int width() const
Conventional names for the first three dimensions.
Definition: HalideBuffer.h:1103
Halide::Runtime::BufferDeviceOwnership::Unmanaged
@ Unmanaged
halide_device_detach_native will be called when device ref count goes to zero
halide_device_interface_t::device_and_host_malloc
int(* device_and_host_malloc)(void *user_context, struct halide_buffer_t *buf, const struct halide_device_interface_t *device_interface)
Definition: HalideRuntime.h:731
Halide::Runtime::Buffer::owns_host_memory
bool owns_host_memory() const
Does this Buffer own the host memory it refers to?
Definition: HalideBuffer.h:177
Halide::Runtime::Buffer::make_interleaved
static Buffer< T, D > make_interleaved(T *data, int width, int height, int channels)
Wrap an existing interleaved image.
Definition: HalideBuffer.h:1749
Halide::min
Expr min(const FuncRef &a, const FuncRef &b)
Explicit overloads of min and max for FuncRef.
Definition: Func.h:577
Halide::Runtime::Buffer::translate
void translate(int d, int delta)
Translate an image in-place along one dimension by changing how it is indexed.
Definition: HalideBuffer.h:1346
halide_device_interface_t::device_malloc
int(* device_malloc)(void *user_context, struct halide_buffer_t *buf, const struct halide_device_interface_t *device_interface)
Definition: HalideRuntime.h:722
Halide::Runtime::Buffer::is_bounds_query
bool is_bounds_query() const
Check if an input buffer passed extern stage is a querying bounds.
Definition: HalideBuffer.h:2354
Halide::Runtime::Buffer::as_const
HALIDE_ALWAYS_INLINE Buffer< typename std::add_const< T >::type, D > & as_const() &
as_const() is syntactic sugar for .as<const T>(), to avoid the need to recapitulate the type argument...
Definition: HalideBuffer.h:1084
Halide::Runtime::Buffer::Buffer
Buffer(int first)
Allocate a new image of the given size.
Definition: HalideBuffer.h:836
Halide::Runtime::Buffer::deallocate
void deallocate()
Drop reference to any owned host or device memory, possibly freeing it, if this buffer held the last ...
Definition: HalideBuffer.h:798
Halide::Runtime::Buffer::assert_can_convert_from
static void assert_can_convert_from(const Buffer< T2, D2 > &other)
Fail an assertion at runtime or compile-time if an Buffer<T, D> cannot be constructed from some other...
Definition: HalideBuffer.h:630
halide_device_interface_t::wrap_native
int(* wrap_native)(void *user_context, struct halide_buffer_t *buf, uint64_t handle, const struct halide_device_interface_t *device_interface)
Definition: HalideRuntime.h:741
Halide::Runtime::Buffer::msan_check_mem_is_initialized
void msan_check_mem_is_initialized(bool entire=false) const
Convenient check to verify that all of the interesting bytes in the Buffer are initialized under MSAN...
Definition: HalideBuffer.h:2363
Halide::Runtime::Buffer::Dimension
Read-only access to the shape.
Definition: HalideBuffer.h:440
Halide::Runtime::Buffer::Buffer
Buffer(halide_type_t t, add_const_if_T_is_const< void > *data, const std::vector< halide_dimension_t > &shape)
Initialize a Buffer from a pointer to the min coordinate and a vector describing the shape.
Definition: HalideBuffer.h:997
Halide::Runtime::Buffer::for_each_element
const HALIDE_ALWAYS_INLINE Buffer< T, D > & for_each_element(Fn &&f) const
Call a function at each site in a buffer.
Definition: HalideBuffer.h:2306
Halide::Runtime::Buffer::operator()
HALIDE_ALWAYS_INLINE not_void_T & operator()(int first, Args... rest)
Definition: HalideBuffer.h:1913
Halide::Runtime::Buffer::sliced
Buffer< T, D > sliced(int d) const
Make a lower-dimensional buffer that refers to one slice of this buffer at the dimension's minimum.
Definition: HalideBuffer.h:1476
Halide::Runtime::Buffer::device_free
int device_free(void *ctx=nullptr)
Definition: HalideBuffer.h:1613
Halide::Runtime::Buffer::set_min
void set_min(const std::vector< int > &mins)
Set the min coordinate of an image in the first N dimensions.
Definition: HalideBuffer.h:1374
Halide::Runtime::Buffer::Dimension::iterator::operator*
int operator*() const
Definition: HalideBuffer.h:469
Halide::Runtime::Buffer::Buffer
Buffer(halide_type_t t, const std::vector< int > &sizes, const std::vector< int > &storage_order)
Allocate a new image of unknown type using a vector of ints as the size and a vector of indices indic...
Definition: HalideBuffer.h:902
Halide::Runtime::Buffer::copy_to_interleaved
Buffer< not_const_T, D > copy_to_interleaved(void *(*allocate_fn)(size_t)=nullptr, void(*deallocate_fn)(void *)=nullptr) const
Like copy(), but the copy is created in interleaved memory layout (vs.
Definition: HalideBuffer.h:1156
Halide::Runtime::Buffer::fill
Buffer< T, D > & fill(not_void_T val)
Definition: HalideBuffer.h:1946
halide_type_t
A runtime tag for a type in the halide type system.
Definition: HalideRuntime.h:426
Halide::Runtime::BufferDeviceOwnership::Cropped
@ Cropped
Call device_and_host_free when DevRefCount goes to zero.
Halide::Runtime::Buffer::set_host_dirty
HALIDE_ALWAYS_INLINE void set_host_dirty(bool v=true)
Methods for managing any GPU allocation.
Definition: HalideBuffer.h:1573
Halide::Runtime::Buffer::contains
bool contains(const std::vector< int > &coords) const
Test if a given coordinate is within the bounds of an image.
Definition: HalideBuffer.h:1390
Halide::Runtime::Buffer::Dimension::extent
HALIDE_ALWAYS_INLINE int extent() const
The extent of the image along this dimension.
Definition: HalideBuffer.h:456
uint64_t
unsigned __INT64_TYPE__ uint64_t
Definition: runtime_internal.h:19
Halide::Runtime::Buffer::top
int top() const
Definition: HalideBuffer.h:1124
Halide::Runtime::AllInts
Definition: HalideBuffer.h:48
Halide::Runtime::Buffer::right
int right() const
Definition: HalideBuffer.h:1120
malloc
void * malloc(size_t)
Halide::Runtime::Buffer::min
int min(int i) const
Access to the mins, strides, extents.
Definition: HalideBuffer.h:503
HALIDE_NEVER_INLINE
#define HALIDE_NEVER_INLINE
Definition: HalideRuntime.h:33
Halide::Runtime::AllocationHeader
A struct acting as a header for allocations owned by the Buffer class itself.
Definition: HalideBuffer.h:80
Halide::Runtime::Buffer::transposed
Buffer< T, D > transposed(const std::vector< int > &order) const
Make a buffer which refers to the same data in the same layout using a different ordering of the dime...
Definition: HalideBuffer.h:1451
Halide::Runtime::Buffer::Dimension::min
HALIDE_ALWAYS_INLINE int min() const
The lowest coordinate in this dimension.
Definition: HalideBuffer.h:445
Halide::Runtime::Buffer::crop
void crop(const std::vector< std::pair< int, int >> &rect)
Crop an image in-place along the first N dimensions.
Definition: HalideBuffer.h:1322
Halide::Runtime::Buffer::device_and_host_free
int device_and_host_free(const struct halide_device_interface_t *device_interface, void *ctx=nullptr)
Definition: HalideBuffer.h:1670
Halide
This file defines the class FunctionDAG, which is our representation of a Halide pipeline,...
Definition: AddAtomicMutex.h:21
Halide::Runtime::Buffer::sliced
Buffer< T, D > sliced(int d, int pos) const
Make a lower-dimensional buffer that refers to one slice of this buffer.
Definition: HalideBuffer.h:1459
Halide::Runtime::Buffer::Buffer
Buffer(Array(&vals)[N])
Make an Buffer that refers to a statically sized array.
Definition: HalideBuffer.h:914
Halide::Runtime::Buffer::copy_to_device
int copy_to_device(const struct halide_device_interface_t *device_interface, void *ctx=nullptr)
Definition: HalideBuffer.h:1602
halide_buffer_t::dimensions
int32_t dimensions
The dimensionality of the buffer.
Definition: HalideRuntime.h:1423
Halide::Runtime::Buffer::host_dirty
bool host_dirty() const
Definition: HalideBuffer.h:1586
halide_device_interface_t::copy_to_device
int(* copy_to_device)(void *user_context, struct halide_buffer_t *buf, const struct halide_device_interface_t *device_interface)
Definition: HalideRuntime.h:729
Halide::LinkageType::Internal
@ Internal
Not visible externally, similar to 'static' linkage in C.
HALIDE_ALLOCA
#define HALIDE_ALLOCA
Definition: HalideBuffer.h:30
Halide::Runtime::Buffer::device_deallocate
void device_deallocate()
Drop reference to any owned device memory, possibly freeing it if this buffer held the last reference...
Definition: HalideBuffer.h:805
Halide::Runtime::Buffer::all_equal
bool all_equal(not_void_T val) const
Tests that all values in this buffer are equal to val.
Definition: HalideBuffer.h:1940
Halide::Runtime::Buffer::has_static_halide_type
static constexpr bool has_static_halide_type
True if the Halide type is not void (or const void).
Definition: HalideBuffer.h:168
Halide::Runtime::Buffer::allocate
void allocate(void *(*allocate_fn)(size_t)=nullptr, void(*deallocate_fn)(void *)=nullptr)
Allocate memory for this Buffer.
Definition: HalideBuffer.h:771
Halide::Runtime::Buffer::operator()
HALIDE_ALWAYS_INLINE not_void_T & operator()()
Definition: HalideBuffer.h:1922
Halide::Runtime::Buffer::cropped
Buffer< T, D > cropped(int d, int min, int extent) const
Make an image that refers to a sub-range of this image along the given dimension.
Definition: HalideBuffer.h:1265
Halide::Runtime::Buffer::Buffer
Buffer(const Buffer< T, D > &other)
Copy constructor.
Definition: HalideBuffer.h:639
Halide::Buffer
A Halide::Buffer is a named shared reference to a Halide::Runtime::Buffer.
Definition: Argument.h:16
Halide::Runtime::Buffer::make_interleaved
static Buffer< T, D > make_interleaved(int width, int height, int channels)
If you use the (x, y, c) indexing convention, then Halide Buffers are stored planar by default.
Definition: HalideBuffer.h:1735
Halide::Runtime::Buffer::Buffer
Buffer(T *data, int d, const halide_dimension_t *shape)
Initialize an Buffer from a pointer to the min coordinate and an array describing the shape.
Definition: HalideBuffer.h:1005
Halide::Runtime::Buffer::operator=
Buffer< T, D > & operator=(const Buffer< T, D > &other)
Standard assignment operator.
Definition: HalideBuffer.h:709
Halide::Runtime::Buffer::static_halide_type
static halide_type_t static_halide_type()
Get the Halide type of T.
Definition: HalideBuffer.h:172
Halide::Runtime::Buffer::Buffer
Buffer(const std::vector< int > &sizes, const std::vector< int > &storage_order)
Definition: HalideBuffer.h:907
Halide::Runtime::AllocationHeader::AllocationHeader
AllocationHeader(void(*deallocate_fn)(void *))
Definition: HalideBuffer.h:85
Halide::Runtime::Buffer::for_each_value
HALIDE_ALWAYS_INLINE Buffer< T, D > & for_each_value(Fn &&f, Args &&... other_buffers)
Definition: HalideBuffer.h:2115
Halide::Runtime::Buffer
A templated Buffer class that wraps halide_buffer_t and adds functionality.
Definition: HalideBuffer.h:43
Halide::Runtime::Buffer::Dimension::stride
HALIDE_ALWAYS_INLINE int stride() const
The number of elements in memory you have to step over to increment this coordinate by one.
Definition: HalideBuffer.h:451
HALIDE_ALWAYS_INLINE
#define HALIDE_ALWAYS_INLINE
Definition: HalideRuntime.h:32
Halide::Runtime::Buffer::transposed
Buffer< T, D > transposed(int d1, int d2) const
Make a buffer which refers to the same data in the same layout using a swapped indexing order for the...
Definition: HalideBuffer.h:1410
Halide::Runtime::Buffer::height
int height() const
Definition: HalideBuffer.h:1106
Halide::Runtime::Buffer::Dimension::begin
HALIDE_ALWAYS_INLINE iterator begin() const
An iterator that points to the min coordinate.
Definition: HalideBuffer.h:482
Halide::Runtime::Buffer::ElemType
T ElemType
Definition: HalideBuffer.h:437
Halide::Runtime::Buffer::stride
int stride(int i) const
Definition: HalideBuffer.h:509
Halide::Runtime::Buffer::embedded
Buffer< T, D > embedded(int d, int pos=0) const
Make a new buffer that views this buffer as a single slice in a higher-dimensional space.
Definition: HalideBuffer.h:1511
Halide::Runtime::Buffer::alias
Buffer< T, D > alias() const
Make a copy of the Buffer which shares the underlying host and/or device allocations as the existing ...
Definition: HalideBuffer.h:1195
Halide::Runtime::Buffer::set_device_dirty
void set_device_dirty(bool v=true)
Definition: HalideBuffer.h:1590
Halide::Runtime::BufferDeviceOwnership::Allocated
@ Allocated
Halide::Runtime::Buffer::device_ownership
BufferDeviceOwnership device_ownership() const
Return the method by which the device field is managed.
Definition: HalideBuffer.h:1706
Halide::Runtime::Buffer::Buffer
Buffer(halide_type_t t, add_const_if_T_is_const< void > *data, const std::vector< int > &sizes)
Initialize an Buffer of runtime type from a pointer and a vector of sizes.
Definition: HalideBuffer.h:969
Halide::Runtime::BufferDeviceOwnership
BufferDeviceOwnership
This indicates how to deallocate the device for a Halide::Runtime::Buffer.
Definition: HalideBuffer.h:91
Halide::Runtime::Buffer::Buffer
Buffer(halide_type_t t, add_const_if_T_is_const< void > *data, int first, Args &&... rest)
Initialize an Buffer of runtime type from a pointer and some sizes.
Definition: HalideBuffer.h:928
Halide::Runtime::Buffer::device_dirty
HALIDE_ALWAYS_INLINE bool device_dirty() const
Definition: HalideBuffer.h:1582
halide_buffer_t
struct halide_buffer_t halide_buffer_t
The raw representation of an image passed around by generated Halide code.
Halide::Runtime::Buffer::copy
Buffer< not_const_T, D > copy(void *(*allocate_fn)(size_t)=nullptr, void(*deallocate_fn)(void *)=nullptr) const
Make a new image which is a deep copy of this image.
Definition: HalideBuffer.h:1145
Halide::Runtime::Buffer::Dimension::max
HALIDE_ALWAYS_INLINE int max() const
The highest coordinate in this dimension.
Definition: HalideBuffer.h:461
Halide::Runtime::Buffer::add_dimension
void add_dimension()
Add a new dimension with a min of zero and an extent of one.
Definition: HalideBuffer.h:1532
Halide::Runtime::Buffer::embed
void embed(int d, int pos=0)
Embed a buffer in-place, increasing the dimensionality.
Definition: HalideBuffer.h:1519
Halide::Runtime::Buffer::cropped
Buffer< T, D > cropped(const std::vector< std::pair< int, int >> &rect) const
Make an image that refers to a sub-rectangle of this image along the first N dimensions.
Definition: HalideBuffer.h:1301
Halide::Runtime::Buffer::operator()
const HALIDE_ALWAYS_INLINE not_void_T & operator()(int first, Args... rest) const
Access elements.
Definition: HalideBuffer.h:1884
Halide::Runtime::Buffer::transpose
void transpose(int d1, int d2)
Transpose a buffer in-place by changing how it is indexed.
Definition: HalideBuffer.h:1423
Halide::Runtime::Buffer::Buffer
Buffer(const Buffer< T2, D2 > &other)
Construct a Buffer from a Buffer of different dimensionality and type.
Definition: HalideBuffer.h:654
Halide::Runtime::Buffer::operator=
Buffer< T, D > & operator=(Buffer< T2, D2 > &&other)
Move from another Buffer of possibly-different dimensionality and type.
Definition: HalideBuffer.h:727
Halide::Runtime::Buffer::type
halide_type_t type() const
Get the type of the elements.
Definition: HalideBuffer.h:530
Halide::Runtime::Buffer::has_device_allocation
bool has_device_allocation() const
Definition: HalideBuffer.h:1701
Halide::Runtime::Buffer::operator()
HALIDE_ALWAYS_INLINE not_void_T & operator()(const int *pos)
Definition: HalideBuffer.h:1931
Halide::Runtime::Buffer::check_overflow
void check_overflow()
Check the product of the extents fits in memory.
Definition: HalideBuffer.h:756
Halide::Runtime::Buffer::Buffer
Buffer(Buffer< T2, D2 > &&other)
Move-construct a Buffer from a Buffer of different dimensionality and type.
Definition: HalideBuffer.h:678
Halide::Runtime::Buffer::Dimension::iterator::operator++
iterator & operator++()
Definition: HalideBuffer.h:475
Halide::Runtime::Buffer::make_interleaved
static Buffer< void, D > make_interleaved(halide_type_t t, int width, int height, int channels)
If you use the (x, y, c) indexing convention, then Halide Buffers are stored planar by default.
Definition: HalideBuffer.h:1720
Halide::Runtime::Buffer::operator=
Buffer< T, D > & operator=(const Buffer< T2, D2 > &other)
Assign from another Buffer of possibly-different dimensionality and type.
Definition: HalideBuffer.h:693
Halide::Runtime::Buffer::Dimension::end
HALIDE_ALWAYS_INLINE iterator end() const
An iterator that points to one past the max coordinate.
Definition: HalideBuffer.h:487
Halide::Runtime::DeviceRefCount::count
std::atomic< int > count
Definition: HalideBuffer.h:103
Halide::Runtime::Buffer::crop
void crop(int d, int min, int extent)
Crop an image in-place along the given dimension.
Definition: HalideBuffer.h:1285
Halide::Runtime::Buffer::Buffer
Buffer(const halide_buffer_t &buf, BufferDeviceOwnership ownership=BufferDeviceOwnership::Unmanaged)
Make a Buffer from a halide_buffer_t.
Definition: HalideBuffer.h:593
Halide::Runtime::Buffer::make_with_shape_of
static Buffer< T, D > make_with_shape_of(Buffer< T2, D2 > src, void *(*allocate_fn)(size_t)=nullptr, void(*deallocate_fn)(void *)=nullptr)
Make a buffer with the same shape and memory nesting order as another buffer.
Definition: HalideBuffer.h:1777
Halide::Runtime::Buffer::make_interleaved
static Buffer< add_const_if_T_is_const< void >, D > make_interleaved(halide_type_t t, T *data, int width, int height, int channels)
Wrap an existing interleaved image.
Definition: HalideBuffer.h:1741
Halide::Runtime::AllocationHeader::deallocate_fn
void(* deallocate_fn)(void *)
Definition: HalideBuffer.h:81
Halide::Runtime::Buffer::Dimension::iterator::val
int val
Definition: HalideBuffer.h:468
Halide::Runtime::Buffer::~Buffer
~Buffer()
Destructor.
Definition: HalideBuffer.h:1024
Halide::Runtime::Buffer::left
int left() const
Conventional names for the min and max value of each dimension.
Definition: HalideBuffer.h:1116
Halide::Runtime::Buffer::bottom
int bottom() const
Definition: HalideBuffer.h:1128
Halide::Runtime::Buffer::as
HALIDE_ALWAYS_INLINE Buffer< T2, D2 > as() &&
Returns this rval Buffer with a different type attached.
Definition: HalideBuffer.h:1075
Halide::Runtime::Buffer::contains
bool contains(Args... args) const
Definition: HalideBuffer.h:1401
Halide::Runtime::Buffer::make_scalar
static Buffer< T, D > make_scalar()
Make a zero-dimensional Buffer.
Definition: HalideBuffer.h:1761
Halide::Runtime::Buffer::can_convert_from
static bool can_convert_from(const Buffer< T2, D2 > &other)
Determine if if an Buffer<T, D> can be constructed from some other Buffer type.
Definition: HalideBuffer.h:619
Halide::Runtime::Buffer::operator=
Buffer< T, D > & operator=(Buffer< T, D > &&other) noexcept
Standard move-assignment operator.
Definition: HalideBuffer.h:742
Halide::Runtime::Buffer::translated
Buffer< T, D > translated(const std::vector< int > &delta) const
Make an image which refers to the same data translated along the first N dimensions.
Definition: HalideBuffer.h:1354
dst
char * dst
Definition: printer.h:32
Halide::Runtime::Buffer::Buffer
Buffer(Buffer< T, D > &&other) noexcept
Move constructor.
Definition: HalideBuffer.h:664
Halide::Runtime::Buffer::Dimension::iterator::operator!=
bool operator!=(const iterator &other) const
Definition: HalideBuffer.h:472
HalideRuntime.h
Halide::Runtime::Buffer::slice
void slice(int d)
Slice a buffer in-place at the dimension's minimum.
Definition: HalideBuffer.h:1497
memcpy
void * memcpy(void *s1, const void *s2, size_t n)
Halide::Runtime::BufferDeviceOwnership::AllocatedDeviceAndHost
@ AllocatedDeviceAndHost
No free routine will be called when device ref count goes to zero
free
void free(void *)
Halide::Runtime::Buffer::device_wrap_native
int device_wrap_native(const struct halide_device_interface_t *device_interface, uint64_t handle, void *ctx=nullptr)
Definition: HalideBuffer.h:1636
halide_buffer_t
The raw representation of an image passed around by generated Halide code.
Definition: HalideRuntime.h:1404
Halide::Runtime::Buffer::end
T * end() const
A pointer to one beyond the element with the highest address.
Definition: HalideBuffer.h:569
Halide::Runtime::Buffer::device_sync
int device_sync(void *ctx=nullptr)
Definition: HalideBuffer.h:1693
Halide::Runtime::Buffer::Buffer
Buffer()
Definition: HalideBuffer.h:586
halide_dimension_t::stride
int32_t stride
Definition: HalideRuntime.h:1366
Halide::Runtime::Buffer::data
T * data() const
Get a pointer to the address of the min coordinate.
Definition: HalideBuffer.h:1871
Halide::Runtime::Buffer::copy_to_host
int copy_to_host(void *ctx=nullptr)
Definition: HalideBuffer.h:1595
Halide::Runtime::Buffer::make_scalar
static Buffer< T, D > make_scalar(T *data)
Make a zero-dimensional Buffer that points to non-owned, existing data.
Definition: HalideBuffer.h:1768
buf
char * buf
Definition: printer.h:32
halide_device_interface_t
Each GPU API provides a halide_device_interface_t struct pointing to the code that manages device all...
Definition: HalideRuntime.h:721
Halide::Runtime::Buffer::set_min
void set_min(Args... args)
Definition: HalideBuffer.h:1383
Halide::Runtime::Buffer::for_each_value
const HALIDE_ALWAYS_INLINE Buffer< T, D > & for_each_value(Fn &&f, Args &&... other_buffers) const
Call a function on every value in the buffer, and the corresponding values in some number of other bu...
Definition: HalideBuffer.h:2107
Halide::Runtime::Buffer::for_each_element
HALIDE_ALWAYS_INLINE Buffer< T, D > & for_each_element(Fn &&f)
Definition: HalideBuffer.h:2314
Halide::Runtime::BufferDeviceOwnership::WrappedNative
@ WrappedNative
halide_device_free will be called when device ref count goes to zero
Halide::Runtime::Buffer::reset
void reset()
Reset the Buffer to be equivalent to a default-constructed Buffer of the same static type (if any); B...
Definition: HalideBuffer.h:582
Halide::Runtime::AllocationHeader::ref_count
std::atomic< int > ref_count
Definition: HalideBuffer.h:82
Halide::Runtime::Buffer::device_malloc
int device_malloc(const struct halide_device_interface_t *device_interface, void *ctx=nullptr)
Definition: HalideBuffer.h:1609
uint32_t
unsigned __INT32_TYPE__ uint32_t
Definition: runtime_internal.h:21
Halide::Runtime::Buffer::fill
Buffer< T, D > & fill(Fn &&f)
Fill a buffer by evaluating a callable at every site.
Definition: HalideBuffer.h:2344
Halide::Runtime::Buffer::as
const HALIDE_ALWAYS_INLINE Buffer< T2, D2 > & as() const &
Return a const typed reference to this Buffer.
Definition: HalideBuffer.h:1065
Halide::Runtime::Buffer::extent
int extent(int i) const
Definition: HalideBuffer.h:506
Halide::max
Expr max(const FuncRef &a, const FuncRef &b)
Definition: Func.h:580
Halide::Runtime::Buffer::make_scalar
static Buffer< add_const_if_T_is_const< void >, D > make_scalar(halide_type_t t)
Make a zero-dimensional Buffer.
Definition: HalideBuffer.h:1754
Halide::Runtime::Buffer::Buffer
Buffer(halide_type_t t, const std::vector< int > &sizes)
Allocate a new image of unknown type using a vector of ints as the size.
Definition: HalideBuffer.h:868
Halide::Runtime::Buffer::Buffer
Buffer(T *data, int first, Args &&... rest)
Initialize an Buffer from a pointer and some sizes.
Definition: HalideBuffer.h:945
Halide::Runtime::Buffer::as_const
HALIDE_ALWAYS_INLINE Buffer< typename std::add_const< T >::type, D > as_const() &&
Definition: HalideBuffer.h:1096
Halide::Runtime::Buffer::copy_to_planar
Buffer< not_const_T, D > copy_to_planar(void *(*allocate_fn)(size_t)=nullptr, void(*deallocate_fn)(void *)=nullptr) const
Like copy(), but the copy is created in planar memory layout (vs.
Definition: HalideBuffer.h:1169
Halide::Runtime::DeviceRefCount::ownership
BufferDeviceOwnership ownership
Definition: HalideBuffer.h:104
halide_dimension_t::min
int32_t min
Definition: HalideRuntime.h:1366
Halide::Runtime::Buffer::channels
int channels() const
Definition: HalideBuffer.h:1109
ptrdiff_t
__PTRDIFF_TYPE__ ptrdiff_t
Definition: runtime_internal.h:28
Halide::Runtime::Buffer::as_const
const HALIDE_ALWAYS_INLINE Buffer< typename std::add_const< T >::type, D > & as_const() const &
Definition: HalideBuffer.h:1091
Halide::Runtime::Buffer::Buffer
Buffer(halide_type_t t, add_const_if_T_is_const< void > *data, int d, const halide_dimension_t *shape)
Initialize an Buffer from a pointer to the min coordinate and an array describing the shape.
Definition: HalideBuffer.h:982
Halide::Runtime::Buffer::Buffer
Buffer(const std::vector< int > &sizes)
Allocate a new image of known type using a vector of ints as the size.
Definition: HalideBuffer.h:882
halide_buffer_t::device
uint64_t device
A device-handle for e.g.
Definition: HalideRuntime.h:1406
Halide::Runtime::Buffer::Buffer
Buffer(halide_type_t t, int first, Args... rest)
Allocate a new image of the given size with a runtime type.
Definition: HalideBuffer.h:815
Halide::Runtime::Buffer::Buffer
Buffer(T *data, const std::vector< halide_dimension_t > &shape)
Initialize a Buffer from a pointer to the min coordinate and a vector describing the shape.
Definition: HalideBuffer.h:1017
Halide::Runtime::Buffer::raw_buffer
halide_buffer_t * raw_buffer()
Get a pointer to the raw halide_buffer_t this wraps.
Definition: HalideBuffer.h:1031
Halide::Runtime::Buffer::copy_from
void copy_from(const Buffer< T2, D2 > &other)
Fill a Buffer with the values at the same coordinates in another Buffer.
Definition: HalideBuffer.h:1209
Halide::Runtime::DeviceRefCount
A similar struct for managing device allocations.
Definition: HalideBuffer.h:100
Halide::Runtime::Internal::any_zero
bool any_zero(const Container &c)
Definition: HalideBuffer.h:70
Halide::Runtime::Buffer::translated
Buffer< T, D > translated(int d, int dx) const
Make an image which refers to the same data with using translated coordinates in the given dimension.
Definition: HalideBuffer.h:1338
Halide::Runtime::Buffer::number_of_elements
size_t number_of_elements() const
The total number of elements this buffer represents.
Definition: HalideBuffer.h:516