Halide
HalideBuffer.h
Go to the documentation of this file.
1 /** \file
2  * Defines a Buffer type that wraps from buffer_t and adds
3  * functionality, and methods for more conveniently iterating over the
4  * samples in a buffer_t outside of Halide code. */
5 
6 #ifndef HALIDE_RUNTIME_BUFFER_H
7 #define HALIDE_RUNTIME_BUFFER_H
8 
9 #include <memory>
10 #include <vector>
11 #include <cassert>
12 #include <atomic>
13 #include <algorithm>
14 #include <limits>
15 #include <stdint.h>
16 #include <string.h>
17 
18 #include "HalideRuntime.h"
19 
20 #ifdef _MSC_VER
21 #define HALIDE_ALLOCA _alloca
22 #else
23 #define HALIDE_ALLOCA __builtin_alloca
24 #endif
25 
26 // gcc 5.1 has a false positive warning on this code
27 #if __GNUC__ == 5 && __GNUC_MINOR__ == 1
28 #pragma GCC diagnostic ignored "-Warray-bounds"
29 #endif
30 
31 namespace Halide {
32 namespace Runtime {
33 
34 // Forward-declare our Buffer class
35 template<typename T, int D> class Buffer;
36 
37 // A helper to check if a parameter pack is entirely implicitly
38 // int-convertible to use with std::enable_if
39 template<typename ...Args>
40 struct AllInts : std::false_type {};
41 
42 template<>
43 struct AllInts<> : std::true_type {};
44 
45 template<typename T, typename ...Args>
46 struct AllInts<T, Args...> {
47  static const bool value = std::is_convertible<T, int>::value && AllInts<Args...>::value;
48 };
49 
50 // Floats and doubles are technically implicitly int-convertible, but
51 // doing so produces a warning we treat as an error, so just disallow
52 // it here.
53 template<typename ...Args>
54 struct AllInts<float, Args...> : std::false_type {};
55 
56 template<typename ...Args>
57 struct AllInts<double, Args...> : std::false_type {};
58 
59 /** A struct acting as a header for allocations owned by the Buffer
60  * class itself. */
62  void (*deallocate_fn)(void *);
63  std::atomic<int> ref_count {0};
64 };
65 
66 /** This indicates how to deallocate the device for a Halide::Runtime::Buffer. */
67 enum struct BufferDeviceOwnership : int {
68  Allocated, ///> halide_device_free will be called when device ref count goes to zero
69  WrappedNative, ///> halide_device_detach_native will be called when device ref count goes to zero
70  Unmanaged, ///> No free routine will be called when device ref count goes to zero
71  AllocatedDeviceAndHost, ///> Call device_and_host_free when DeveRefCount goes to zero.
72 };
73 
74 /** A similar struct for managing device allocations. */
76  // This is only ever constructed when there's something to manage,
77  // so start at one.
78  std::atomic<int> count {1};
80 };
81 
82 /** A templated Buffer class that wraps halide_buffer_t and adds
83  * functionality. When using Halide from C++, this is the preferred
84  * way to create input and output buffers. The overhead of using this
85  * class relative to a naked halide_buffer_t is minimal - it uses another
86  * ~16 bytes on the stack, and does no dynamic allocations when using
87  * it to represent existing memory of a known maximum dimensionality.
88  *
89  * The template parameter T is the element type. For buffers where the
90  * element type is unknown, or may vary, use void or const void.
91  *
92  * D is the maximum number of dimensions that can be represented using
93  * space inside the class itself. Set it to the maximum dimensionality
94  * you expect this buffer to be. If the actual dimensionality exceeds
95  * this, heap storage is allocated to track the shape of the buffer. D
96  * defaults to 4, which should cover nearly all usage.
97  *
98  * The class optionally allocates and owns memory for the image using
99  * a shared pointer allocated with the provided allocator. If they are
100  * null, malloc and free are used. Any device-side allocation is
101  * considered as owned if and only if the host-side allocation is
102  * owned. */
103 template<typename T = void, int D = 4>
104 class Buffer {
105  /** The underlying buffer_t */
106  halide_buffer_t buf = {0};
107 
108  /** Some in-class storage for shape of the dimensions. */
109  halide_dimension_t shape[D];
110 
111  /** The allocation owned by this Buffer. NULL if the Buffer does not
112  * own the memory. */
113  AllocationHeader *alloc = nullptr;
114 
115  /** A reference count for the device allocation owned by this
116  * buffer. */
117  mutable DeviceRefCount *dev_ref_count = nullptr;
118 
119  /** True if T is of type void or const void */
120  static const bool T_is_void = std::is_same<typename std::remove_const<T>::type, void>::value;
121 
122  /** A type function that adds a const qualifier if T is a const type. */
123  template<typename T2>
124  using add_const_if_T_is_const = typename std::conditional<std::is_const<T>::value, const T2, T2>::type;
125 
126  /** T unless T is (const) void, in which case (const)
127  * uint8_t. Useful for providing return types for operator() */
128  using not_void_T = typename std::conditional<T_is_void,
129  add_const_if_T_is_const<uint8_t>,
130  T>::type;
131 
132  /** The type the elements are stored as. Equal to not_void_T
133  * unless T is a pointer, in which case uint64_t. Halide stores
134  * all pointer types as uint64s internally, even on 32-bit
135  * systems. */
136  using storage_T = typename std::conditional<std::is_pointer<T>::value, uint64_t, not_void_T>::type;
137 
138 public:
139  /** True if the Halide type is not void (or const void). */
140  static constexpr bool has_static_halide_type = !T_is_void;
141 
142  /** Get the Halide type of T. Callers should not use the result if
143  * has_static_halide_type is false. */
145  return halide_type_of<typename std::remove_cv<not_void_T>::type>();
146  }
147 
148  /** Does this Buffer own the host memory it refers to? */
149  bool owns_host_memory() const {
150  return alloc != nullptr;
151  }
152 
153 private:
154  /** Increment the reference count of any owned allocation */
155  void incref() const {
156  if (owns_host_memory()) {
157  alloc->ref_count++;
158  }
159  if (buf.device) {
160  if (!dev_ref_count) {
161  // I seem to have a non-zero dev field but no
162  // reference count for it. I must have been given a
163  // device allocation by a Halide pipeline, and have
164  // never been copied from since. Take sole ownership
165  // of it.
166  dev_ref_count = new DeviceRefCount;
167  }
168  dev_ref_count->count++;
169  }
170  }
171 
172  /** Decrement the reference count of any owned allocation and free host
173  * and device memory if it hits zero. Sets alloc to nullptr. */
174  void decref() {
175  if (owns_host_memory()) {
176  int new_count = --(alloc->ref_count);
177  if (new_count == 0) {
178  void (*fn)(void *) = alloc->deallocate_fn;
179  fn(alloc);
180  }
181  buf.host = nullptr;
182  alloc = nullptr;
183  set_host_dirty(false);
184  }
185  decref_dev();
186  }
187 
188  void decref_dev() {
189  int new_count = 0;
190  if (dev_ref_count) {
191  new_count = --(dev_ref_count->count);
192  }
193  if (new_count == 0) {
194  if (buf.device) {
195  assert(!(alloc && device_dirty()) &&
196  "Implicitly freeing a dirty device allocation while a host allocation still lives. "
197  "Call device_free explicitly if you want to drop dirty device-side data. "
198  "Call copy_to_host explicitly if you want the data copied to the host allocation "
199  "before the device allocation is freed.");
200  if (dev_ref_count && dev_ref_count->ownership == BufferDeviceOwnership::WrappedNative) {
201  buf.device_interface->detach_native(nullptr, &buf);
202  } else if (dev_ref_count && dev_ref_count->ownership == BufferDeviceOwnership::AllocatedDeviceAndHost) {
203  buf.device_interface->device_and_host_free(nullptr, &buf);
204  } else if (dev_ref_count == nullptr || dev_ref_count->ownership == BufferDeviceOwnership::Allocated) {
205  buf.device_interface->device_free(nullptr, &buf);
206  }
207  }
208  if (dev_ref_count) {
209  delete dev_ref_count;
210  }
211  }
212  buf.device = 0;
213  buf.device_interface = nullptr;
214  dev_ref_count = nullptr;
215  }
216 
217  void free_shape_storage() {
218  if (buf.dim != shape) {
219  delete[] buf.dim;
220  buf.dim = nullptr;
221  }
222  }
223 
224  void make_shape_storage() {
225  if (buf.dimensions <= D) {
226  buf.dim = shape;
227  } else {
228  buf.dim = new halide_dimension_t[buf.dimensions];
229  }
230  }
231 
232  void copy_shape_from(const halide_buffer_t &other) {
233  // All callers of this ensure that buf.dimensions == other.dimensions.
234  make_shape_storage();
235  for (int i = 0; i < buf.dimensions; i++) {
236  buf.dim[i] = other.dim[i];
237  }
238  }
239 
240  template<typename T2, int D2>
241  void move_shape_from(Buffer<T2, D2> &&other) {
242  if (other.shape == other.buf.dim) {
243  copy_shape_from(other.buf);
244  } else {
245  buf.dim = other.buf.dim;
246  other.buf.dim = nullptr;
247  }
248  }
249 
250  /** Initialize the shape from a halide_buffer_t. */
251  void initialize_from_buffer(const halide_buffer_t &b,
252  BufferDeviceOwnership ownership) {
253  memcpy(&buf, &b, sizeof(halide_buffer_t));
254  copy_shape_from(b);
255  if (b.device) {
256  dev_ref_count = new DeviceRefCount;
257  dev_ref_count->ownership = ownership;
258  }
259  }
260 
261  /** Initialize the shape from a parameter pack of ints */
262  template<typename ...Args>
263  void initialize_shape(int next, int first, Args... rest) {
264  buf.dim[next].min = 0;
265  buf.dim[next].extent = first;
266  if (next == 0) {
267  buf.dim[next].stride = 1;
268  } else {
269  buf.dim[next].stride = buf.dim[next-1].stride * buf.dim[next-1].extent;
270  }
271  initialize_shape(next + 1, rest...);
272  }
273 
274  /** Base case for the template recursion above. */
275  void initialize_shape(int) {
276  }
277 
278  /** Initialize the shape from a vector of extents */
279  void initialize_shape(const std::vector<int> &sizes) {
280  assert(sizes.size() <= std::numeric_limits<int>::max());
281  int limit = (int)sizes.size();
282  assert(limit <= dimensions());
283  for (int i = 0; i < limit; i++) {
284  buf.dim[i].min = 0;
285  buf.dim[i].extent = sizes[i];
286  if (i == 0) {
287  buf.dim[i].stride = 1;
288  } else {
289  buf.dim[i].stride = buf.dim[i-1].stride * buf.dim[i-1].extent;
290  }
291  }
292  }
293 
294  /** Initialize the shape from the static shape of an array */
295  template<typename Array, size_t N>
296  void initialize_shape_from_array_shape(int next, Array (&vals)[N]) {
297  buf.dim[next].min = 0;
298  buf.dim[next].extent = (int)N;
299  if (next == 0) {
300  buf.dim[next].stride = 1;
301  } else {
302  initialize_shape_from_array_shape(next - 1, vals[0]);
303  buf.dim[next].stride = buf.dim[next - 1].stride * buf.dim[next - 1].extent;
304  }
305  }
306 
307  /** Base case for the template recursion above. */
308  template<typename T2>
309  void initialize_shape_from_array_shape(int, const T2 &) {
310  }
311 
312  /** Get the dimensionality of a multi-dimensional C array */
313  template<typename Array, size_t N>
314  static int dimensionality_of_array(Array (&vals)[N]) {
315  return dimensionality_of_array(vals[0]) + 1;
316  }
317 
318  template<typename T2>
319  static int dimensionality_of_array(const T2 &) {
320  return 0;
321  }
322 
323  /** Get the underlying halide_type_t of an array's element type. */
324  template<typename Array, size_t N>
325  static halide_type_t scalar_type_of_array(Array (&vals)[N]) {
326  return scalar_type_of_array(vals[0]);
327  }
328 
329  template<typename T2>
330  static halide_type_t scalar_type_of_array(const T2 &) {
331  return halide_type_of<typename std::remove_cv<T2>::type>();
332  }
333 
334  /** Check if any args in a parameter pack are zero */
335  template<typename ...Args>
336  static bool any_zero(int first, Args... rest) {
337  if (first == 0) return true;
338  return any_zero(rest...);
339  }
340 
341  static bool any_zero() {
342  return false;
343  }
344 
345  static bool any_zero(const std::vector<int> &v) {
346  for (int i : v) {
347  if (i == 0) return true;
348  }
349  return false;
350  }
351 
352 public:
353 
354  typedef T ElemType;
355 
356  /** Read-only access to the shape */
357  class Dimension {
358  const halide_dimension_t &d;
359  public:
360  /** The lowest coordinate in this dimension */
361  HALIDE_ALWAYS_INLINE int min() const {
362  return d.min;
363  }
364 
365  /** The number of elements in memory you have to step over to
366  * increment this coordinate by one. */
368  return d.stride;
369  }
370 
371  /** The extent of the image along this dimension */
373  return d.extent;
374  }
375 
376  /** The highest coordinate in this dimension */
377  HALIDE_ALWAYS_INLINE int max() const {
378  return min() + extent() - 1;
379  }
380 
381  /** An iterator class, so that you can iterate over
382  * coordinates in a dimensions using a range-based for loop. */
383  struct iterator {
384  int val;
385  int operator*() const {return val;}
386  bool operator!=(const iterator &other) const {return val != other.val;}
387  iterator &operator++() {val++; return *this;}
388  };
389 
390  /** An iterator that points to the min coordinate */
392  return {min()};
393  }
394 
395  /** An iterator that points to one past the max coordinate */
397  return {min() + extent()};
398  }
399 
400  Dimension(const halide_dimension_t &dim) : d(dim) {};
401  };
402 
403  /** Access the shape of the buffer */
405  return Dimension(buf.dim[i]);
406  }
407 
408  /** Access to the mins, strides, extents. Will be deprecated. Do not use. */
409  // @{
410  int min(int i) const { return dim(i).min(); }
411  int extent(int i) const { return dim(i).extent(); }
412  int stride(int i) const { return dim(i).stride(); }
413  // @}
414 
415  /** The total number of elements this buffer represents. Equal to
416  * the product of the extents */
417  size_t number_of_elements() const {
418  size_t s = 1;
419  for (int i = 0; i < dimensions(); i++) {
420  s *= dim(i).extent();
421  }
422  return s;
423  }
424 
425  /** Get the dimensionality of the buffer. */
426  int dimensions() const {
427  return buf.dimensions;
428  }
429 
430  /** Get the type of the elements. */
431  halide_type_t type() const {
432  return buf.type;
433  }
434 
435  /** A pointer to the element with the lowest address. If all
436  * strides are positive, equal to the host pointer. */
437  T *begin() const {
438  ptrdiff_t index = 0;
439  for (int i = 0; i < dimensions(); i++) {
440  if (dim(i).stride() < 0) {
441  index += dim(i).stride() * (dim(i).extent() - 1);
442  }
443  }
444  return (T *)(buf.host + index * type().bytes());
445  }
446 
447  /** A pointer to one beyond the element with the highest address. */
448  T *end() const {
449  ptrdiff_t index = 0;
450  for (int i = 0; i < dimensions(); i++) {
451  if (dim(i).stride() > 0) {
452  index += dim(i).stride() * (dim(i).extent() - 1);
453  }
454  }
455  index += 1;
456  return (T *)(buf.host + index * type().bytes());
457  }
458 
459  /** The total number of bytes spanned by the data in memory. */
460  size_t size_in_bytes() const {
461  return (size_t)((const uint8_t *)end() - (const uint8_t *)begin());
462  }
463 
464  Buffer() {
465  buf.type = static_halide_type();
466  make_shape_storage();
467  }
468 
469  /** Make a Buffer from a halide_buffer_t */
472  assert(T_is_void || buf.type == static_halide_type());
473  initialize_from_buffer(buf, ownership);
474  }
475 
476  /** Make a Buffer from a legacy buffer_t. */
477  Buffer(const buffer_t &old_buf) {
478  assert(!T_is_void && old_buf.elem_size == static_halide_type().bytes());
479  buf.host = old_buf.host;
480  buf.type = static_halide_type();
481  int d;
482  for (d = 0; d < 4 && old_buf.extent[d]; d++);
483  buf.dimensions = d;
484  make_shape_storage();
485  for (int i = 0; i < d; i++) {
486  buf.dim[i].min = old_buf.min[i];
487  buf.dim[i].extent = old_buf.extent[i];
488  buf.dim[i].stride = old_buf.stride[i];
489  }
490  buf.set_host_dirty(old_buf.host_dirty);
491  assert(old_buf.dev == 0 && "Cannot construct a Halide::Runtime::Buffer from a legacy buffer_t with a device allocation. Use halide_upgrade_buffer_t to upgrade it to a halide_buffer_t first.");
492  }
493 
494  /** Populate the fields of a legacy buffer_t using this
495  * Buffer. Does not copy device metadata. */
497  buffer_t old_buf = {0};
498  assert(!has_device_allocation() && "Cannot construct a legacy buffer_t from a Halide::Runtime::Buffer with a device allocation. Use halide_downgrade_buffer_t instead.");
499  old_buf.host = buf.host;
500  old_buf.elem_size = buf.type.bytes();
501  assert(dimensions() <= 4 && "Cannot construct a legacy buffer_t from a Halide::Runtime::Buffer with more than four dimensions.");
502  for (int i = 0; i < dimensions(); i++) {
503  old_buf.min[i] = dim(i).min();
504  old_buf.extent[i] = dim(i).extent();
505  old_buf.stride[i] = dim(i).stride();
506  }
507  return old_buf;
508  }
509 
510  /** Give Buffers access to the members of Buffers of different dimensionalities and types. */
511  template<typename T2, int D2> friend class Buffer;
512 
513  /** Determine if if an Buffer<T, D> can be constructed from some other Buffer type.
514  * If this can be determined at compile time, fail with a static assert; otherwise
515  * return a boolean based on runtime typing. */
516  template<typename T2, int D2>
517  static bool can_convert_from(const Buffer<T2, D2> &other) {
518  static_assert((!std::is_const<T2>::value || std::is_const<T>::value),
519  "Can't convert from a Buffer<const T> to a Buffer<T>");
520  static_assert(std::is_same<typename std::remove_const<T>::type,
521  typename std::remove_const<T2>::type>::value ||
522  T_is_void || Buffer<T2, D2>::T_is_void,
523  "type mismatch constructing Buffer");
524  if (Buffer<T2, D2>::T_is_void && !T_is_void) {
525  return other.type() == static_halide_type();
526  }
527  return true;
528  }
529 
530  /** Fail an assertion at runtime or compile-time if an Buffer<T, D>
531  * cannot be constructed from some other Buffer type. */
532  template<typename T2, int D2>
533  static void assert_can_convert_from(const Buffer<T2, D2> &other) {
534  assert(can_convert_from(other));
535  }
536 
537  /** Copy constructor. Does not copy underlying data. */
538  Buffer(const Buffer<T, D> &other) : buf(other.buf),
539  alloc(other.alloc) {
540  other.incref();
541  dev_ref_count = other.dev_ref_count;
542  copy_shape_from(other.buf);
543  }
544 
545  /** Construct a Buffer from a Buffer of different dimensionality
546  * and type. Asserts that the type matches (at runtime, if one of
547  * the types is void). Note that this constructor is
548  * implicit. This, for example, lets you pass things like
549  * Buffer<T> or Buffer<const void> to functions expected
550  * Buffer<const T>. */
551  template<typename T2, int D2>
552  Buffer(const Buffer<T2, D2> &other) : buf(other.buf),
553  alloc(other.alloc) {
554  assert_can_convert_from(other);
555  other.incref();
556  dev_ref_count = other.dev_ref_count;
557  copy_shape_from(other.buf);
558  }
559 
560  /** Move constructor */
561  Buffer(Buffer<T, D> &&other) : buf(other.buf),
562  alloc(other.alloc),
563  dev_ref_count(other.dev_ref_count) {
564  other.dev_ref_count = nullptr;
565  other.alloc = nullptr;
566  other.buf.device = 0;
567  other.buf.device_interface = nullptr;
568  move_shape_from(std::forward<Buffer<T, D>>(other));
569  }
570 
571  /** Move-construct a Buffer from a Buffer of different
572  * dimensionality and type. Asserts that the types match (at
573  * runtime if one of the types is void). */
574  template<typename T2, int D2>
575  Buffer(Buffer<T2, D2> &&other) : buf(other.buf),
576  alloc(other.alloc),
577  dev_ref_count(other.dev_ref_count) {
578  other.dev_ref_count = nullptr;
579  other.alloc = nullptr;
580  other.buf.device = 0;
581  other.buf.device_interface = nullptr;
582  move_shape_from(std::forward<Buffer<T2, D2>>(other));
583  }
584 
585  /** Assign from another Buffer of possibly-different
586  * dimensionality and type. Asserts that the types match (at
587  * runtime if one of the types is void). */
588  template<typename T2, int D2>
590  if ((const void *)this == (const void *)&other) {
591  return *this;
592  }
593  assert_can_convert_from(other);
594  other.incref();
595  decref();
596  dev_ref_count = other.dev_ref_count;
597  alloc = other.alloc;
598  free_shape_storage();
599  buf = other.buf;
600  copy_shape_from(other.buf);
601  return *this;
602  }
603 
604  /** Standard assignment operator */
606  if (this == &other) {
607  return *this;
608  }
609  other.incref();
610  decref();
611  dev_ref_count = other.dev_ref_count;
612  alloc = other.alloc;
613  free_shape_storage();
614  buf = other.buf;
615  copy_shape_from(other.buf);
616  return *this;
617  }
618 
619  /** Move from another Buffer of possibly-different
620  * dimensionality and type. Asserts that the types match (at
621  * runtime if one of the types is void). */
622  template<typename T2, int D2>
624  assert_can_convert_from(other);
625  decref();
626  alloc = other.alloc;
627  other.alloc = nullptr;
628  dev_ref_count = other.dev_ref_count;
629  other.dev_ref_count = nullptr;
630  free_shape_storage();
631  buf = other.buf;
632  other.buf.device = 0;
633  other.buf.device_interface = nullptr;
634  move_shape_from(std::forward<Buffer<T2, D2>>(other));
635  return *this;
636  }
637 
638  /** Standard move-assignment operator */
640  decref();
641  alloc = other.alloc;
642  other.alloc = nullptr;
643  dev_ref_count = other.dev_ref_count;
644  other.dev_ref_count = nullptr;
645  free_shape_storage();
646  buf = other.buf;
647  other.buf.device = 0;
648  other.buf.device_interface = nullptr;
649  move_shape_from(std::forward<Buffer<T, D>>(other));
650  return *this;
651  }
652 
653  /** Check the product of the extents fits in memory. */
654  void check_overflow() {
655  size_t size = type().bytes();
656  for (int i = 0; i < dimensions(); i++) {
657  size *= dim(i).extent();
658  }
659  // We allow 2^31 or 2^63 bytes, so drop the top bit.
660  size = (size << 1) >> 1;
661  for (int i = 0; i < dimensions(); i++) {
662  size /= dim(i).extent();
663  }
664  assert(size == (size_t)type().bytes() && "Error: Overflow computing total size of buffer.");
665  }
666 
667  /** Allocate memory for this Buffer. Drops the reference to any
668  * owned memory. */
669  void allocate(void *(*allocate_fn)(size_t) = nullptr,
670  void (*deallocate_fn)(void *) = nullptr) {
671  if (!allocate_fn) {
672  allocate_fn = malloc;
673  }
674  if (!deallocate_fn) {
675  deallocate_fn = free;
676  }
677 
678  // Drop any existing allocation
679  deallocate();
680 
681  // Conservatively align images to 128 bytes. This is enough
682  // alignment for all the platforms we might use.
683  size_t size = size_in_bytes();
684  const size_t alignment = 128;
685  size = (size + alignment - 1) & ~(alignment - 1);
686  alloc = (AllocationHeader *)allocate_fn(size + sizeof(AllocationHeader) + alignment - 1);
687  alloc->deallocate_fn = deallocate_fn;
688  alloc->ref_count = 1;
689  uint8_t *unaligned_ptr = ((uint8_t *)alloc) + sizeof(AllocationHeader);
690  buf.host = (uint8_t *)((uintptr_t)(unaligned_ptr + alignment - 1) & ~(alignment - 1));
691  }
692 
693  /** Drop reference to any owned host or device memory, possibly
694  * freeing it, if this buffer held the last reference to
695  * it. Retains the shape of the buffer. Does nothing if this
696  * buffer did not allocate its own memory. */
697  void deallocate() {
698  decref();
699  }
700 
701  /** Drop reference to any owned device memory, possibly freeing it
702  * if this buffer held the last reference to it. Asserts that
703  * device_dirty is false. */
705  decref_dev();
706  }
707 
708  /** Allocate a new image of the given size with a runtime
709  * type. Only used when you do know what size you want but you
710  * don't know statically what type the elements are. Pass zeroes
711  * to make a buffer suitable for bounds query calls. */
712  template<typename ...Args,
713  typename = typename std::enable_if<AllInts<Args...>::value>::type>
714  Buffer(halide_type_t t, int first, Args... rest) {
715  if (!T_is_void) {
716  assert(static_halide_type() == t);
717  }
718  buf.type = t;
719  buf.dimensions = 1 + (int)(sizeof...(rest));
720  make_shape_storage();
721  initialize_shape(0, first, rest...);
722  if (!any_zero(first, rest...)) {
723  check_overflow();
724  allocate();
725  }
726  }
727 
728 
729  /** Allocate a new image of the given size. Pass zeroes to make a
730  * buffer suitable for bounds query calls. */
731  // @{
732 
733  // The overload with one argument is 'explicit', so that
734  // (say) int is not implicitly convertable to Buffer<int>
735  explicit Buffer(int first) {
736  static_assert(!T_is_void,
737  "To construct an Buffer<void>, pass a halide_type_t as the first argument to the constructor");
738  buf.type = static_halide_type();
739  buf.dimensions = 1;
740  make_shape_storage();
741  initialize_shape(0, first);
742  if (first != 0) {
743  check_overflow();
744  allocate();
745  }
746  }
747 
748  template<typename ...Args,
749  typename = typename std::enable_if<AllInts<Args...>::value>::type>
750  Buffer(int first, int second, Args... rest) {
751  static_assert(!T_is_void,
752  "To construct an Buffer<void>, pass a halide_type_t as the first argument to the constructor");
753  buf.type = static_halide_type();
754  buf.dimensions = 2 + (int)(sizeof...(rest));
755  make_shape_storage();
756  initialize_shape(0, first, second, rest...);
757  if (!any_zero(first, second, rest...)) {
758  check_overflow();
759  allocate();
760  }
761  }
762  // @}
763 
764  /** Allocate a new image of unknown type using a vector of ints as the size. */
765  Buffer(halide_type_t t, const std::vector<int> &sizes) {
766  if (!T_is_void) {
767  assert(static_halide_type() == t);
768  }
769  buf.type = t;
770  buf.dimensions = (int)sizes.size();
771  make_shape_storage();
772  initialize_shape(sizes);
773  if (!any_zero(sizes)) {
774  check_overflow();
775  allocate();
776  }
777  }
778 
779  /** Allocate a new image of known type using a vector of ints as the size. */
780  Buffer(const std::vector<int> &sizes) {
781  buf.type = static_halide_type();
782  buf.dimensions = (int)sizes.size();
783  make_shape_storage();
784  initialize_shape(sizes);
785  if (!any_zero(sizes)) {
786  check_overflow();
787  allocate();
788  }
789  }
790 
791  /** Make an Buffer that refers to a statically sized array. Does not
792  * take ownership of the data, and does not set the host_dirty flag. */
793  template<typename Array, size_t N>
794  explicit Buffer(Array (&vals)[N]) {
795  buf.dimensions = dimensionality_of_array(vals);
796  buf.type = scalar_type_of_array(vals);
797  buf.host = (uint8_t *)vals;
798  make_shape_storage();
799  initialize_shape_from_array_shape(buf.dimensions - 1, vals);
800  }
801 
802  /** Initialize an Buffer of runtime type from a pointer and some
803  * sizes. Assumes dense row-major packing and a min coordinate of
804  * zero. Does not take ownership of the data and does not set the
805  * host_dirty flag. */
806  template<typename ...Args,
807  typename = typename std::enable_if<AllInts<Args...>::value>::type>
808  explicit Buffer(halide_type_t t, add_const_if_T_is_const<void> *data, int first, Args&&... rest) {
809  if (!T_is_void) {
810  assert(static_halide_type() == t);
811  }
812  buf.type = t;
813  buf.dimensions = 1 + (int)(sizeof...(rest));
814  buf.host = (uint8_t *)data;
815  make_shape_storage();
816  initialize_shape(0, first, int(rest)...);
817  }
818 
819  /** Initialize an Buffer from a pointer and some sizes. Assumes
820  * dense row-major packing and a min coordinate of zero. Does not
821  * take ownership of the data and does not set the host_dirty flag. */
822  template<typename ...Args,
823  typename = typename std::enable_if<AllInts<Args...>::value>::type>
824  explicit Buffer(T *data, int first, Args&&... rest) {
825  buf.type = static_halide_type();
826  buf.dimensions = 1 + (int)(sizeof...(rest));
827  buf.host = (uint8_t *)data;
828  make_shape_storage();
829  initialize_shape(0, first, int(rest)...);
830  }
831 
832  /** Initialize an Buffer from a pointer and a vector of
833  * sizes. Assumes dense row-major packing and a min coordinate of
834  * zero. Does not take ownership of the data and does not set the
835  * host_dirty flag. */
836  explicit Buffer(T *data, const std::vector<int> &sizes) {
837  buf.type = static_halide_type();
838  buf.dimensions = (int)sizes.size();
839  buf.host = (uint8_t *)data;
840  make_shape_storage();
841  initialize_shape(sizes);
842  }
843 
844  /** Initialize an Buffer of runtime type from a pointer and a
845  * vector of sizes. Assumes dense row-major packing and a min
846  * coordinate of zero. Does not take ownership of the data and
847  * does not set the host_dirty flag. */
848  explicit Buffer(halide_type_t t, add_const_if_T_is_const<void> *data, const std::vector<int> &sizes) {
849  if (!T_is_void) {
850  assert(static_halide_type() == t);
851  }
852  buf.type = t;
853  buf.dimensions = (int)sizes.size();
854  buf.host = (uint8_t *)data;
855  make_shape_storage();
856  initialize_shape(sizes);
857  }
858 
859  /** Initialize an Buffer from a pointer to the min coordinate and
860  * an array describing the shape. Does not take ownership of the
861  * data, and does not set the host_dirty flag. */
862  explicit Buffer(halide_type_t t, add_const_if_T_is_const<void> *data, int d, const halide_dimension_t *shape) {
863  if (!T_is_void) {
864  assert(static_halide_type() == t);
865  }
866  buf.type = t;
867  buf.dimensions = d;
868  buf.host = (uint8_t *)data;
869  make_shape_storage();
870  for (int i = 0; i < d; i++) {
871  buf.dim[i] = shape[i];
872  }
873  }
874 
875  /** Initialize an Buffer from a pointer to the min coordinate and
876  * an array describing the shape. Does not take ownership of the
877  * data and does not set the host_dirty flag. */
878  explicit Buffer(T *data, int d, const halide_dimension_t *shape) {
879  buf.type = halide_type_of<typename std::remove_cv<T>::type>();
880  buf.dimensions = d;
881  buf.host = (uint8_t *)data;
882  make_shape_storage();
883  for (int i = 0; i < d; i++) {
884  buf.dim[i] = shape[i];
885  }
886  }
887 
888  /** Destructor. Will release any underlying owned allocation if
889  * this is the last reference to it. Will assert fail if there are
890  * weak references to this Buffer outstanding. */
892  free_shape_storage();
893  decref();
894  }
895 
896  /** Get a pointer to the raw halide_buffer_t this wraps. */
897  // @{
899  return &buf;
900  }
901 
902  const halide_buffer_t *raw_buffer() const {
903  return &buf;
904  }
905  // @}
906 
907  /** Provide a cast operator to halide_buffer_t *, so that
908  * instances can be passed directly to Halide filters. */
909  operator halide_buffer_t *() {
910  return &buf;
911  }
912 
913  /** Return a typed reference to this Buffer. Useful for converting
914  * a reference to a Buffer<void> to a reference to, for example, a
915  * Buffer<const uint8_t>. Does a runtime assert if the source
916  * buffer type is void. */
917  template<typename T2, int D2 = D,
918  typename = typename std::enable_if<(D2 <= D)>::type>
921  return *((Buffer<T2, D2> *)this);
922  }
923 
924  /** Return a const typed reference to this Buffer. Useful for
925  * converting a conference reference to one Buffer type to a const
926  * reference to another Buffer type. Does a runtime assert if the
927  * source buffer type is void. */
928  template<typename T2, int D2 = D,
929  typename = typename std::enable_if<(D2 <= D)>::type>
930  const Buffer<T2, D2> &as() const & {
932  return *((const Buffer<T2, D2> *)this);
933  }
934 
935  /** Returns this rval Buffer with a different type attached. Does
936  * a dynamic type check if the source type is void. */
937  template<typename T2, int D2 = D>
940  return *((Buffer<T2, D2> *)this);
941  }
942 
943  /** Conventional names for the first three dimensions. */
944  // @{
945  int width() const {
946  return (dimensions() > 0) ? dim(0).extent() : 1;
947  }
948  int height() const {
949  return (dimensions() > 1) ? dim(1).extent() : 1;
950  }
951  int channels() const {
952  return (dimensions() > 2) ? dim(2).extent() : 1;
953  }
954  // @}
955 
956  /** Conventional names for the min and max value of each dimension */
957  // @{
958  int left() const {
959  return dim(0).min();
960  }
961 
962  int right() const {
963  return dim(0).max();
964  }
965 
966  int top() const {
967  return dim(1).min();
968  }
969 
970  int bottom() const {
971  return dim(1).max();
972  }
973  // @}
974 
975  /** Make a new image which is a deep copy of this image. Use crop
976  * or slice followed by copy to make a copy of only a portion of
977  * the image. The new image uses the same memory layout as the
978  * original, with holes compacted away. */
979  Buffer<T, D> copy(void *(*allocate_fn)(size_t) = nullptr,
980  void (*deallocate_fn)(void *) = nullptr) const {
981  Buffer<T, D> dst = make_with_shape_of(*this, allocate_fn, deallocate_fn);
982  dst.copy_from(*this);
983  return dst;
984  }
985 
986  /** Fill a Buffer with the values at the same coordinates in
987  * another Buffer. Restricts itself to coordinates contained
988  * within the intersection of the two buffers. If the two Buffers
989  * are not in the same coordinate system, you will need to
990  * translate the argument Buffer first. E.g. if you're blitting a
991  * sprite onto a framebuffer, you'll want to translate the sprite
992  * to the correct location first like so: \code
993  * framebuffer.copy_from(sprite.translated({x, y})); \endcode
994  */
995  template<typename T2, int D2>
996  void copy_from(const Buffer<T2, D2> &other) {
997  assert(!device_dirty() && "Cannot call Halide::Runtime::Buffer::copy_from on a device dirty destination.");
998  assert(!other.device_dirty() && "Cannot call Halide::Runtime::Buffer::copy_from on a device dirty source.");
999 
1000  Buffer<const T, D> src(other);
1001  Buffer<T, D> dst(*this);
1002 
1003  assert(src.dimensions() == dst.dimensions());
1004 
1005  // Trim the copy to the region in common
1006  for (int i = 0; i < dimensions(); i++) {
1007  int min_coord = std::max(dst.dim(i).min(), src.dim(i).min());
1008  int max_coord = std::min(dst.dim(i).max(), src.dim(i).max());
1009  if (max_coord < min_coord) {
1010  // The buffers do not overlap.
1011  return;
1012  }
1013  dst.crop(i, min_coord, max_coord - min_coord + 1);
1014  src.crop(i, min_coord, max_coord - min_coord + 1);
1015  }
1016 
1017  // If T is void, we need to do runtime dispatch to an
1018  // appropriately-typed lambda. We're copying, so we only care
1019  // about the element size.
1020  if (type().bytes() == 1) {
1021  using MemType = uint8_t;
1022  auto &typed_dst = (Buffer<MemType, D> &)dst;
1023  auto &typed_src = (Buffer<const MemType, D> &)src;
1024  typed_dst.for_each_value([&](MemType &dst, MemType src) {dst = src;}, typed_src);
1025  } else if (type().bytes() == 2) {
1026  using MemType = uint16_t;
1027  auto &typed_dst = (Buffer<MemType, D> &)dst;
1028  auto &typed_src = (Buffer<const MemType, D> &)src;
1029  typed_dst.for_each_value([&](MemType &dst, MemType src) {dst = src;}, typed_src);
1030  } else if (type().bytes() == 4) {
1031  using MemType = uint32_t;
1032  auto &typed_dst = (Buffer<MemType, D> &)dst;
1033  auto &typed_src = (Buffer<const MemType, D> &)src;
1034  typed_dst.for_each_value([&](MemType &dst, MemType src) {dst = src;}, typed_src);
1035  } else if (type().bytes() == 8) {
1036  using MemType = uint64_t;
1037  auto &typed_dst = (Buffer<MemType, D> &)dst;
1038  auto &typed_src = (Buffer<const MemType, D> &)src;
1039  typed_dst.for_each_value([&](MemType &dst, MemType src) {dst = src;}, typed_src);
1040  } else {
1041  assert(false && "type().bytes() must be 1, 2, 4, or 8");
1042  }
1043  set_host_dirty();
1044  }
1045 
1046  /** Make an image that refers to a sub-range of this image along
1047  * the given dimension. Does not assert the crop region is within
1048  * the existing bounds. The cropped image drops any device
1049  * handle. */
1050  Buffer<T, D> cropped(int d, int min, int extent) const {
1051  // Make a fresh copy of the underlying buffer (but not a fresh
1052  // copy of the allocation, if there is one).
1053  Buffer<T, D> im = *this;
1054  im.crop(d, min, extent);
1055  return im;
1056  }
1057 
1058  /** Crop an image in-place along the given dimension. */
1059  void crop(int d, int min, int extent) {
1060  // assert(dim(d).min() <= min);
1061  // assert(dim(d).max() >= min + extent - 1);
1062  int shift = min - dim(d).min();
1063  if (shift) {
1064  device_deallocate();
1065  }
1066  if (buf.host != nullptr) {
1067  buf.host += shift * dim(d).stride() * type().bytes();
1068  }
1069  buf.dim[d].min = min;
1070  buf.dim[d].extent = extent;
1071  }
1072 
1073  /** Make an image that refers to a sub-rectangle of this image along
1074  * the first N dimensions. Does not assert the crop region is within
1075  * the existing bounds. The cropped image drops any device handle. */
1076  Buffer<T, D> cropped(const std::vector<std::pair<int, int>> &rect) const {
1077  // Make a fresh copy of the underlying buffer (but not a fresh
1078  // copy of the allocation, if there is one).
1079  Buffer<T, D> im = *this;
1080  im.crop(rect);
1081  return im;
1082  }
1083 
1084  /** Crop an image in-place along the first N dimensions. */
1085  void crop(const std::vector<std::pair<int, int>> &rect) {
1086  assert(rect.size() <= std::numeric_limits<int>::max());
1087  int limit = (int)rect.size();
1088  assert(limit <= dimensions());
1089  for (int i = 0; i < limit; i++) {
1090  crop(i, rect[i].first, rect[i].second);
1091  }
1092  }
1093 
1094  /** Make an image which refers to the same data with using
1095  * translated coordinates in the given dimension. Positive values
1096  * move the image data to the right or down relative to the
1097  * coordinate system. Drops any device handle. */
1098  Buffer<T, D> translated(int d, int dx) const {
1099  Buffer<T, D> im = *this;
1100  im.translate(d, dx);
1101  return im;
1102  }
1103 
1104  /** Translate an image in-place along one dimension */
1105  void translate(int d, int delta) {
1106  device_deallocate();
1107  buf.dim[d].min += delta;
1108  }
1109 
1110  /** Make an image which refers to the same data translated along
1111  * the first N dimensions. */
1112  Buffer<T, D> translated(const std::vector<int> &delta) {
1113  Buffer<T, D> im = *this;
1114  im.translate(delta);
1115  return im;
1116  }
1117 
1118  /** Translate an image along the first N dimensions */
1119  void translate(const std::vector<int> &delta) {
1120  device_deallocate();
1121  assert(delta.size() <= std::numeric_limits<int>::max());
1122  int limit = (int)delta.size();
1123  assert(limit <= dimensions());
1124  for (int i = 0; i < limit; i++) {
1125  translate(i, delta[i]);
1126  }
1127  }
1128 
1129  /** Set the min coordinate of an image in the first N dimensions */
1130  template<typename ...Args>
1131  void set_min(Args... args) {
1132  assert(sizeof...(args) <= (size_t)dimensions());
1133  device_deallocate();
1134  const int x[] = {args...};
1135  for (size_t i = 0; i < sizeof...(args); i++) {
1136  buf.dim[i].min = x[i];
1137  }
1138  }
1139 
1140  /** Test if a given coordinate is within the the bounds of an image */
1141  template<typename ...Args>
1142  bool contains(Args... args) {
1143  assert(sizeof...(args) <= (size_t)dimensions());
1144  const int x[] = {args...};
1145  for (size_t i = 0; i < sizeof...(args); i++) {
1146  if (x[i] < dim(i).min() || x[i] > dim(i).max()) {
1147  return false;
1148  }
1149  }
1150  return true;
1151  }
1152 
1153  /** Make an image which refers to the same data using a different
1154  * ordering of the dimensions. */
1155  Buffer<T, D> transposed(int d1, int d2) const {
1156  Buffer<T, D> im = *this;
1157  im.transpose(d1, d2);
1158  return im;
1159  }
1160 
1161  /** Transpose an image in-place */
1162  void transpose(int d1, int d2) {
1163  std::swap(buf.dim[d1], buf.dim[d2]);
1164  }
1165 
1166  /** Make a lower-dimensional image that refers to one slice of this
1167  * image. */
1168  Buffer<T, D> sliced(int d, int pos) const {
1169  Buffer<T, D> im = *this;
1170  im.slice(d, pos);
1171  return im;
1172  }
1173 
1174  /** Slice an image in-place */
1175  void slice(int d, int pos) {
1176  // assert(pos >= dim(d).min() && pos <= dim(d).max());
1177  device_deallocate();
1178  buf.dimensions--;
1179  int shift = pos - dim(d).min();
1180  assert(buf.device == 0 || shift == 0);
1181  if (buf.host != nullptr) {
1182  buf.host += shift * dim(d).stride() * type().bytes();
1183  }
1184  for (int i = d; i < dimensions(); i++) {
1185  buf.dim[i] = buf.dim[i+1];
1186  }
1187  buf.dim[buf.dimensions] = {0, 0, 0};
1188  }
1189 
1190  /** Make a new image that views this image as a single slice in a
1191  * higher-dimensional space. The new dimension has extent one and
1192  * the given min. This operation is the opposite of slice. As an
1193  * example, the following condition is true:
1194  *
1195  \code
1196  im2 = im.embedded(1, 17);
1197  &im(x, y, c) == &im2(x, 17, y, c);
1198  \endcode
1199  */
1200  Buffer<T, D> embedded(int d, int pos) const {
1201  assert(d >= 0 && d <= dimensions());
1202  Buffer<T, D> im(*this);
1203  im.embed(d, pos);
1204  return im;
1205  }
1206 
1207  /** Embed an image in-place, increasing the
1208  * dimensionality. */
1209  void embed(int d, int pos) {
1210  assert(d >= 0 && d <= dimensions());
1211  add_dimension();
1212  translate(dimensions() - 1, pos);
1213  for (int i = dimensions() - 1; i > d; i--) {
1214  transpose(i, i-1);
1215  }
1216  }
1217 
1218  /** Add a new dimension with a min of zero and an extent of
1219  * one. The stride is the extent of the outermost dimension times
1220  * its stride. The new dimension is the last dimension. This is a
1221  * special case of embed. */
1222  void add_dimension() {
1223  const int dims = buf.dimensions;
1224  buf.dimensions++;
1225  if (buf.dim != shape) {
1226  // We're already on the heap. Reallocate.
1227  halide_dimension_t *new_shape = new halide_dimension_t[buf.dimensions];
1228  for (int i = 0; i < dims; i++) {
1229  new_shape[i] = buf.dim[i];
1230  }
1231  delete[] buf.dim;
1232  buf.dim = new_shape;
1233  } else if (dims == D) {
1234  // Transition from the in-class storage to the heap
1235  make_shape_storage();
1236  for (int i = 0; i < dims; i++) {
1237  buf.dim[i] = shape[i];
1238  }
1239  } else {
1240  // We still fit in the class
1241  }
1242  buf.dim[dims] = {0, 1, 0};
1243  if (dims == 0) {
1244  buf.dim[dims].stride = 1;
1245  } else {
1246  buf.dim[dims].stride = buf.dim[dims-1].extent * buf.dim[dims-1].stride;
1247  }
1248  }
1249 
1250  /** Add a new dimension with a min of zero, an extent of one, and
1251  * the specified stride. The new dimension is the last
1252  * dimension. This is a special case of embed. */
1254  add_dimension();
1255  buf.dim[buf.dimensions-1].stride = s;
1256  }
1257 
1258  /** Methods for managing any GPU allocation. */
1259  // @{
1260  void set_host_dirty(bool v = true) {
1261  assert((!v || !device_dirty()) && "Cannot set host dirty when device is already dirty.");
1262  buf.set_host_dirty(v);
1263  }
1264 
1265  bool device_dirty() const {
1266  return buf.device_dirty();
1267  }
1268 
1269  bool host_dirty() const {
1270  return buf.host_dirty();
1271  }
1272 
1273  void set_device_dirty(bool v = true) {
1274  assert((!v || !host_dirty()) && "Cannot set device dirty when host is already dirty.");
1275  buf.set_device_dirty(v);
1276  }
1277 
1278  int copy_to_host(void *ctx = nullptr) {
1279  if (device_dirty()) {
1280  return buf.device_interface->copy_to_host(ctx, &buf);
1281  }
1282  return 0;
1283  }
1284 
1285  int copy_to_device(const struct halide_device_interface_t *device_interface, void *ctx = nullptr) {
1286  if (host_dirty()) {
1287  return device_interface->copy_to_device(ctx, &buf, device_interface);
1288  }
1289  return 0;
1290  }
1291 
1292  int device_malloc(const struct halide_device_interface_t *device_interface, void *ctx = nullptr) {
1293  return device_interface->device_malloc(ctx, &buf, device_interface);
1294  }
1295 
1296  int device_free(void *ctx = nullptr) {
1297  if (dev_ref_count) {
1298  assert(dev_ref_count->ownership == BufferDeviceOwnership::Allocated &&
1299  "Can't call device_free on an unmanaged or wrapped native device handle. "
1300  "Free the source allocation or call device_detach_native instead.");
1301  // Multiple people may be holding onto this dev field
1302  assert(dev_ref_count->count == 1 &&
1303  "Multiple Halide::Runtime::Buffer objects share this device "
1304  "allocation. Freeing it would create dangling references. "
1305  "Don't call device_free on Halide buffers that you have copied or "
1306  "passed by value.");
1307  }
1308  int ret = 0;
1309  if (buf.device_interface) {
1310  ret = buf.device_interface->device_free(ctx, &buf);
1311  }
1312  if (dev_ref_count) {
1313  delete dev_ref_count;
1314  dev_ref_count = nullptr;
1315  }
1316  return ret;
1317  }
1318 
1319  int device_wrap_native(const struct halide_device_interface_t *device_interface,
1320  uint64_t handle, void *ctx = nullptr) {
1321  assert(device_interface);
1322  dev_ref_count = new DeviceRefCount;
1324  return device_interface->wrap_native(ctx, &buf, handle, device_interface);
1325  }
1326 
1327  int device_detach_native(void *ctx = nullptr) {
1328  assert(dev_ref_count &&
1329  dev_ref_count->ownership == BufferDeviceOwnership::WrappedNative &&
1330  "Only call device_detach_native on buffers wrapping a native "
1331  "device handle via device_wrap_native. This buffer was allocated "
1332  "using device_malloc, or is unmanaged. "
1333  "Call device_free or free the original allocation instead.");
1334  // Multiple people may be holding onto this dev field
1335  assert(dev_ref_count->count == 1 &&
1336  "Multiple Halide::Runtime::Buffer objects share this device "
1337  "allocation. Freeing it could create dangling references. "
1338  "Don't call device_detach_native on Halide buffers that you "
1339  "have copied or passed by value.");
1340  int ret = 0;
1341  if (buf.device_interface) {
1342  ret = buf.device_interface->detach_native(ctx, &buf);
1343  }
1344  delete dev_ref_count;
1345  dev_ref_count = nullptr;
1346  return ret;
1347  }
1348 
1349  int device_and_host_malloc(const struct halide_device_interface_t *device_interface, void *ctx = nullptr) {
1350  return device_interface->device_and_host_malloc(ctx, &buf, device_interface);
1351  }
1352 
1353  int device_and_host_free(const struct halide_device_interface_t *device_interface, void *ctx = nullptr) {
1354  if (dev_ref_count) {
1355  assert(dev_ref_count->ownership == BufferDeviceOwnership::AllocatedDeviceAndHost &&
1356  "Can't call device_and_host_free on a device handle not allocated with device_and_host_malloc. "
1357  "Free the source allocation or call device_detach_native instead.");
1358  // Multiple people may be holding onto this dev field
1359  assert(dev_ref_count->count == 1 &&
1360  "Multiple Halide::Runtime::Buffer objects share this device "
1361  "allocation. Freeing it would create dangling references. "
1362  "Don't call device_and_host_free on Halide buffers that you have copied or "
1363  "passed by value.");
1364  }
1365  int ret = 0;
1366  if (buf.device_interface) {
1367  ret = buf.device_interface->device_and_host_free(ctx, &buf);
1368  }
1369  if (dev_ref_count) {
1370  delete dev_ref_count;
1371  dev_ref_count = nullptr;
1372  }
1373  return ret;
1374  }
1375 
1376  int device_sync(void *ctx = nullptr) {
1377  if (buf.device_interface) {
1378  return buf.device_interface->device_sync(ctx, &buf);
1379  } else {
1380  return 0;
1381  }
1382  }
1383 
1384  bool has_device_allocation() const {
1385  return buf.device != 0;
1386  }
1387 
1388  /** Return the method by which the device field is managed. */
1390  if (dev_ref_count == nullptr) {
1392  }
1393  return dev_ref_count->ownership;
1394  }
1395  // @}
1396 
1397  /** If you use the (x, y, c) indexing convention, then Halide
1398  * Buffers are stored planar by default. This function constructs
1399  * an interleaved RGB or RGBA image that can still be indexed
1400  * using (x, y, c). Passing it to a generator requires that the
1401  * generator has been compiled with support for interleaved (also
1402  * known as packed or chunky) memory layouts. */
1403  static Buffer<void, D> make_interleaved(halide_type_t t, int width, int height, int channels) {
1404  Buffer<void, D> im(t, channels, width, height);
1405  im.transpose(0, 1);
1406  im.transpose(1, 2);
1407  return im;
1408  }
1409 
1410  /** If you use the (x, y, c) indexing convention, then Halide
1411  * Buffers are stored planar by default. This function constructs
1412  * an interleaved RGB or RGBA image that can still be indexed
1413  * using (x, y, c). Passing it to a generator requires that the
1414  * generator has been compiled with support for interleaved (also
1415  * known as packed or chunky) memory layouts. */
1416  static Buffer<T, D> make_interleaved(int width, int height, int channels) {
1417  Buffer<T, D> im(channels, width, height);
1418  im.transpose(0, 1);
1419  im.transpose(1, 2);
1420  return im;
1421  }
1422 
1423  /** Wrap an existing interleaved image. */
1425  make_interleaved(halide_type_t t, T *data, int width, int height, int channels) {
1426  Buffer<add_const_if_T_is_const<void>, D> im(t, data, channels, width, height);
1427  im.transpose(0, 1);
1428  im.transpose(1, 2);
1429  return im;
1430  }
1431 
1432  /** Wrap an existing interleaved image. */
1433  static Buffer<T, D> make_interleaved(T *data, int width, int height, int channels) {
1434  Buffer<T, D> im(data, channels, width, height);
1435  im.transpose(0, 1);
1436  im.transpose(1, 2);
1437  return im;
1438  }
1439 
1440  /** Make a zero-dimensional Buffer */
1443  buf.slice(0, 0);
1444  return buf;
1445  }
1446 
1447  /** Make a zero-dimensional Buffer */
1449  Buffer<T, 1> buf(1);
1450  buf.slice(0, 0);
1451  return buf;
1452  }
1453 
1454  /** Make a buffer with the same shape and memory nesting order as
1455  * another buffer. It may have a different type. */
1456  template<typename T2, int D2>
1458  void *(*allocate_fn)(size_t) = nullptr,
1459  void (*deallocate_fn)(void *) = nullptr) {
1460  // Reorder the dimensions of src to have strides in increasing order
1461  std::vector<int> swaps;
1462  for (int i = src.dimensions()-1; i > 0; i--) {
1463  for (int j = i; j > 0; j--) {
1464  if (src.dim(j-1).stride() > src.dim(j).stride()) {
1465  src.transpose(j-1, j);
1466  swaps.push_back(j);
1467  }
1468  }
1469  }
1470 
1471  // Rewrite the strides to be dense (this messes up src, which
1472  // is why we took it by value).
1473  halide_dimension_t *shape = src.buf.dim;
1474  for (int i = 0; i < src.dimensions(); i++) {
1475  if (i == 0) {
1476  shape[i].stride = 1;
1477  } else {
1478  shape[i].stride = shape[i-1].extent * shape[i-1].stride;
1479  }
1480  }
1481 
1482  // Undo the dimension reordering
1483  while (!swaps.empty()) {
1484  int j = swaps.back();
1485  std::swap(shape[j-1], shape[j]);
1486  swaps.pop_back();
1487  }
1488 
1489  Buffer<T, D> dst(nullptr, src.dimensions(), shape);
1490  dst.allocate(allocate_fn, deallocate_fn);
1491 
1492  return dst;
1493  }
1494 
1495 private:
1496 
1497  template<typename ...Args>
1499  ptrdiff_t offset_of(int d, int first, Args... rest) const {
1500  return offset_of(d+1, rest...) + this->buf.dim[d].stride * (first - this->buf.dim[d].min);
1501  }
1502 
1504  ptrdiff_t offset_of(int d) const {
1505  return 0;
1506  }
1507 
1508  template<typename ...Args>
1510  storage_T *address_of(Args... args) const {
1511  if (T_is_void) {
1512  return (storage_T *)(this->buf.host) + offset_of(0, args...) * type().bytes();
1513  } else {
1514  return (storage_T *)(this->buf.host) + offset_of(0, args...);
1515  }
1516  }
1517 
1519  ptrdiff_t offset_of(const int *pos) const {
1520  ptrdiff_t offset = 0;
1521  for (int i = this->dimensions() - 1; i >= 0; i--) {
1522  offset += this->buf.dim[i].stride * (pos[i] - this->buf.dim[i].min);
1523  }
1524  return offset;
1525  }
1526 
1528  storage_T *address_of(const int *pos) const {
1529  if (T_is_void) {
1530  return (storage_T *)this->buf.host + offset_of(pos) * type().bytes();
1531  } else {
1532  return (storage_T *)this->buf.host + offset_of(pos);
1533  }
1534  }
1535 
1536 public:
1537 
1538  /** Get a pointer to the address of the min coordinate. */
1539  // @{
1540  T *data() {
1541  return (T *)(this->buf.host);
1542  }
1543 
1544  const T *data() const {
1545  return (const T *)(this->buf.host);
1546  }
1547  // @}
1548 
1549  /** Access elements. Use im(...) to get a reference to an element,
1550  * and use &im(...) to get the address of an element. If you pass
1551  * fewer arguments than the buffer has dimensions, the rest are
1552  * treated as their min coordinate. The non-const versions set the
1553  * host_dirty flag to true.
1554  */
1555  //@{
1556  template<typename ...Args,
1557  typename = typename std::enable_if<AllInts<Args...>::value>::type>
1559  const not_void_T &operator()(int first, Args... rest) const {
1560  static_assert(!T_is_void,
1561  "Cannot use operator() on Buffer<void> types");
1562  assert(!device_dirty());
1563  return *((const not_void_T *)(address_of(first, rest...)));
1564  }
1565 
1567  const not_void_T &
1568  operator()() const {
1569  static_assert(!T_is_void,
1570  "Cannot use operator() on Buffer<void> types");
1571  assert(!device_dirty());
1572  return *((const not_void_T *)(data()));
1573  }
1574 
1576  const not_void_T &
1577  operator()(const int *pos) const {
1578  static_assert(!T_is_void,
1579  "Cannot use operator() on Buffer<void> types");
1580  assert(!device_dirty());
1581  return *((const not_void_T *)(address_of(pos)));
1582  }
1583 
1584  template<typename ...Args,
1585  typename = typename std::enable_if<AllInts<Args...>::value>::type>
1587  not_void_T &operator()(int first, Args... rest) {
1588  static_assert(!T_is_void,
1589  "Cannot use operator() on Buffer<void> types");
1590  set_host_dirty();
1591  return *((not_void_T *)(address_of(first, rest...)));
1592  }
1593 
1595  not_void_T &
1597  static_assert(!T_is_void,
1598  "Cannot use operator() on Buffer<void> types");
1599  set_host_dirty();
1600  return *((not_void_T *)(data()));
1601  }
1602 
1604  not_void_T &
1605  operator()(const int *pos) {
1606  static_assert(!T_is_void,
1607  "Cannot use operator() on Buffer<void> types");
1608  set_host_dirty();
1609  return *((not_void_T *)(address_of(pos)));
1610  }
1611  // @}
1612 
1613  void fill(not_void_T val) {
1614  set_host_dirty();
1615  for_each_value([=](T &v) {v = val;});
1616  }
1617 
1618 private:
1619  /** Helper functions for for_each_value. */
1620  // @{
1621  template<int N>
1622  struct for_each_value_task_dim {
1623  int extent;
1624  int stride[N];
1625  };
1626 
1627  // Given an array of strides, and a bunch of pointers to pointers
1628  // (all of different types), advance the pointers using the
1629  // strides.
1630  template<typename Ptr, typename ...Ptrs>
1631  static void advance_ptrs(const int *stride, Ptr *ptr, Ptrs... ptrs) {
1632  (*ptr) += *stride;
1633  advance_ptrs(stride + 1, ptrs...);
1634  }
1635 
1636  static void advance_ptrs(const int *) {}
1637 
1638  // Same as the above, but just increments the pointers.
1639  template<typename Ptr, typename ...Ptrs>
1640  static void increment_ptrs(Ptr *ptr, Ptrs... ptrs) {
1641  (*ptr)++;
1642  increment_ptrs(ptrs...);
1643  }
1644 
1645  static void increment_ptrs() {}
1646 
1647  // Given a bunch of pointers to buffers of different types, read
1648  // out their strides in the d'th dimension, and assert that their
1649  // sizes match in that dimension.
1650  template<typename T2, int D2, typename ...Args>
1651  void extract_strides(int d, int *strides, const Buffer<T2, D2> *first, Args... rest) {
1652  assert(first->dimensions() == dimensions());
1653  assert(first->dim(d).min() == dim(d).min() &&
1654  first->dim(d).max() == dim(d).max());
1655  *strides++ = first->dim(d).stride();
1656  extract_strides(d, strides, rest...);
1657  }
1658 
1659  void extract_strides(int d, int *strides) {}
1660 
1661  // The template function that constructs the loop nest for for_each_value
1662  template<int d, bool innermost_strides_are_one, typename Fn, typename... Ptrs>
1663  static void for_each_value_helper(Fn &&f, const for_each_value_task_dim<sizeof...(Ptrs)> *t, Ptrs... ptrs) {
1664  if (d == -1) {
1665  f((*ptrs)...);
1666  } else {
1667  for (int i = t[d].extent; i != 0; i--) {
1668  for_each_value_helper<(d >= 0 ? d - 1 : -1), innermost_strides_are_one>(f, t, ptrs...);
1669  if (d == 0 && innermost_strides_are_one) {
1670  // It helps with auto-vectorization to statically
1671  // know the addresses are one apart in memory.
1672  increment_ptrs((&ptrs)...);
1673  } else {
1674  advance_ptrs(t[d].stride, (&ptrs)...);
1675  }
1676  }
1677  }
1678  }
1679 
1680  template<bool innermost_strides_are_one, typename Fn, typename... Ptrs>
1681  static void for_each_value_helper(Fn &&f, int d, const for_each_value_task_dim<sizeof...(Ptrs)> *t, Ptrs... ptrs) {
1682  // When we hit a low dimensionality, switch from runtime
1683  // recursion to template recursion.
1684  if (d == -1) {
1685  for_each_value_helper<-1, innermost_strides_are_one>(f, t, ptrs...);
1686  } else if (d == 0) {
1687  for_each_value_helper<0, innermost_strides_are_one>(f, t, ptrs...);
1688  } else if (d == 1) {
1689  for_each_value_helper<1, innermost_strides_are_one>(f, t, ptrs...);
1690  } else if (d == 2) {
1691  for_each_value_helper<2, innermost_strides_are_one>(f, t, ptrs...);
1692  } else {
1693  for (int i = t[d].extent; i != 0; i--) {
1694  for_each_value_helper<innermost_strides_are_one>(f, d-1, t, ptrs...);
1695  advance_ptrs(t[d].stride, (&ptrs)...);
1696  }
1697  }
1698  }
1699  // @}
1700 
1701 public:
1702  /** Call a function on every value in the buffer, and the
1703  * corresponding values in some number of other buffers of the
1704  * same size. The function should take a reference, const
1705  * reference, or value of the correct type for each buffer. This
1706  * effectively lifts a function of scalars to an element-wise
1707  * function of buffers. This produces code that the compiler can
1708  * autovectorize. This is slightly cheaper than for_each_element,
1709  * because it does not need to track the coordinates. */
1710  template<typename Fn, typename ...Args, int N = sizeof...(Args) + 1>
1711  void for_each_value(Fn &&f, Args... other_buffers) {
1712  for_each_value_task_dim<N> *t =
1713  (for_each_value_task_dim<N> *)HALIDE_ALLOCA((dimensions()+1) * sizeof(for_each_value_task_dim<N>));
1714  for (int i = 0; i <= dimensions(); i++) {
1715  for (int j = 0; j < N; j++) {
1716  t[i].stride[j] = 0;
1717  }
1718  t[i].extent = 1;
1719  }
1720 
1721  for (int i = 0; i < dimensions(); i++) {
1722  extract_strides(i, t[i].stride, this, &other_buffers...);
1723  t[i].extent = dim(i).extent();
1724  // Order the dimensions by stride, so that the traversal is cache-coherent.
1725  for (int j = i; j > 0 && t[j].stride[0] < t[j-1].stride[0]; j--) {
1726  std::swap(t[j], t[j-1]);
1727  }
1728  }
1729 
1730  // flatten dimensions where possible to make a larger inner
1731  // loop for autovectorization.
1732  int d = dimensions();
1733  for (int i = 1; i < d; i++) {
1734  bool flat = true;
1735  for (int j = 0; j < N; j++) {
1736  flat = flat && t[i-1].stride[j] * t[i-1].extent == t[i].stride[j];
1737  }
1738  if (flat) {
1739  t[i-1].extent *= t[i].extent;
1740  for (int j = i; j < dimensions(); j++) {
1741  t[j] = t[j+1];
1742  }
1743  i--;
1744  d--;
1745  }
1746  }
1747 
1748  bool innermost_strides_are_one = false;
1749  if (dimensions() > 0) {
1750  innermost_strides_are_one = true;
1751  for (int j = 0; j < N; j++) {
1752  innermost_strides_are_one &= t[0].stride[j] == 1;
1753  }
1754  }
1755 
1756  if (innermost_strides_are_one) {
1757  for_each_value_helper<true>(f, dimensions() - 1, t, begin(), (other_buffers.begin())...);
1758  } else {
1759  for_each_value_helper<false>(f, dimensions() - 1, t, begin(), (other_buffers.begin())...);
1760  }
1761  }
1762 
1763 private:
1764 
1765  // Helper functions for for_each_element
1766  struct for_each_element_task_dim {
1767  int min, max;
1768  };
1769 
1770  /** If f is callable with this many args, call it. The first
1771  * argument is just to make the overloads distinct. Actual
1772  * overload selection is done using the enable_if. */
1773  template<typename Fn,
1774  typename ...Args,
1775  typename = decltype(std::declval<Fn>()(std::declval<Args>()...))>
1777  static void for_each_element_variadic(int, int, const for_each_element_task_dim *, Fn &&f, Args... args) {
1778  f(args...);
1779  }
1780 
1781  /** If the above overload is impossible, we add an outer loop over
1782  * an additional argument and try again. */
1783  template<typename Fn,
1784  typename ...Args>
1786  static void for_each_element_variadic(double, int d, const for_each_element_task_dim *t, Fn &&f, Args... args) {
1787  for (int i = t[d].min; i <= t[d].max; i++) {
1788  for_each_element_variadic(0, d - 1, t, std::forward<Fn>(f), i, args...);
1789  }
1790  }
1791 
1792  /** Determine the minimum number of arguments a callable can take
1793  * using the same trick. */
1794  template<typename Fn,
1795  typename ...Args,
1796  typename = decltype(std::declval<Fn>()(std::declval<Args>()...))>
1798  static int num_args(int, Fn &&, Args...) {
1799  return (int)(sizeof...(Args));
1800  }
1801 
1802  /** The recursive version is only enabled up to a recursion limit
1803  * of 256. This catches callables that aren't callable with any
1804  * number of ints. */
1805  template<typename Fn,
1806  typename ...Args>
1808  static int num_args(double, Fn &&f, Args... args) {
1809  static_assert(sizeof...(args) <= 256,
1810  "Callable passed to for_each_element must accept either a const int *,"
1811  " or up to 256 ints. No such operator found. Expect infinite template recursion.");
1812  return num_args(0, std::forward<Fn>(f), 0, args...);
1813  }
1814 
1815  /** A version where the callable takes a position array instead,
1816  * with compile-time recursion on the dimensionality. This
1817  * overload is preferred to the one below using the same int vs
1818  * double trick as above, but is impossible once d hits -1 using
1819  * std::enable_if. */
1820  template<int d,
1821  typename Fn,
1822  typename = typename std::enable_if<(d >= 0)>::type>
1824  static void for_each_element_array_helper(int, const for_each_element_task_dim *t, Fn &&f, int *pos) {
1825  for (pos[d] = t[d].min; pos[d] <= t[d].max; pos[d]++) {
1826  for_each_element_array_helper<d - 1>(0, t, std::forward<Fn>(f), pos);
1827  }
1828  }
1829 
1830  /** Base case for recursion above. */
1831  template<int d,
1832  typename Fn,
1833  typename = typename std::enable_if<(d < 0)>::type>
1835  static void for_each_element_array_helper(double, const for_each_element_task_dim *t, Fn &&f, int *pos) {
1836  f(pos);
1837  }
1838 
1839  /** A run-time-recursive version (instead of
1840  * compile-time-recursive) that requires the callable to take a
1841  * pointer to a position array instead. Dispatches to the
1842  * compile-time-recursive version once the dimensionality gets
1843  * small. */
1844  template<typename Fn>
1845  static void for_each_element_array(int d, const for_each_element_task_dim *t, Fn &&f, int *pos) {
1846  if (d == -1) {
1847  f(pos);
1848  } else if (d == 0) {
1849  // Once the dimensionality gets small enough, dispatch to
1850  // a compile-time-recursive version for better codegen of
1851  // the inner loops.
1852  for_each_element_array_helper<0, Fn>(0, t, std::forward<Fn>(f), pos);
1853  } else if (d == 1) {
1854  for_each_element_array_helper<1, Fn>(0, t, std::forward<Fn>(f), pos);
1855  } else if (d == 2) {
1856  for_each_element_array_helper<2, Fn>(0, t, std::forward<Fn>(f), pos);
1857  } else if (d == 3) {
1858  for_each_element_array_helper<3, Fn>(0, t, std::forward<Fn>(f), pos);
1859  } else {
1860  for (pos[d] = t[d].min; pos[d] <= t[d].max; pos[d]++) {
1861  for_each_element_array(d - 1, t, std::forward<Fn>(f), pos);
1862  }
1863  }
1864  }
1865 
1866  /** We now have two overloads for for_each_element. This one
1867  * triggers if the callable takes a const int *.
1868  */
1869  template<typename Fn,
1870  typename = decltype(std::declval<Fn>()((const int *)nullptr))>
1871  static void for_each_element(int, int dims, const for_each_element_task_dim *t, Fn &&f, int check = 0) {
1872  int *pos = (int *)HALIDE_ALLOCA(dims * sizeof(int));
1873  for_each_element_array(dims - 1, t, std::forward<Fn>(f), pos);
1874  }
1875 
1876  /** This one triggers otherwise. It treats the callable as
1877  * something that takes some number of ints. */
1878  template<typename Fn>
1880  static void for_each_element(double, int dims, const for_each_element_task_dim *t, Fn &&f) {
1881  int args = num_args(0, std::forward<Fn>(f));
1882  assert(dims >= args);
1883  for_each_element_variadic(0, args - 1, t, std::forward<Fn>(f));
1884  }
1885 public:
1886 
1887  /** Call a function at each site in a buffer. This is likely to be
1888  * much slower than using Halide code to populate a buffer, but is
1889  * convenient for tests. If the function has more arguments than the
1890  * buffer has dimensions, the remaining arguments will be zero. If it
1891  * has fewer arguments than the buffer has dimensions then the last
1892  * few dimensions of the buffer are not iterated over. For example,
1893  * the following code exploits this to set a floating point RGB image
1894  * to red:
1895 
1896  \code
1897  Buffer<float, 3> im(100, 100, 3);
1898  im.for_each_element([&](int x, int y) {
1899  im(x, y, 0) = 1.0f;
1900  im(x, y, 1) = 0.0f;
1901  im(x, y, 2) = 0.0f:
1902  });
1903  \endcode
1904 
1905  * The compiled code is equivalent to writing the a nested for loop,
1906  * and compilers are capable of optimizing it in the same way.
1907  *
1908  * If the callable can be called with an int * as the sole argument,
1909  * that version is called instead. Each location in the buffer is
1910  * passed to it in a coordinate array. This version is higher-overhead
1911  * than the variadic version, but is useful for writing generic code
1912  * that accepts buffers of arbitrary dimensionality. For example, the
1913  * following sets the value at all sites in an arbitrary-dimensional
1914  * buffer to their first coordinate:
1915 
1916  \code
1917  im.for_each_element([&](const int *pos) {im(pos) = pos[0];});
1918  \endcode
1919 
1920  * It is also possible to use for_each_element to iterate over entire
1921  * rows or columns by cropping the buffer to a single column or row
1922  * respectively and iterating over elements of the result. For example,
1923  * to set the diagonal of the image to 1 by iterating over the columns:
1924 
1925  \code
1926  Buffer<float, 3> im(100, 100, 3);
1927  im.sliced(1, 0).for_each_element([&](int x, int c) {
1928  im(x, x, c) = 1.0f;
1929  });
1930  \endcode
1931 
1932  * Or, assuming the memory layout is known to be dense per row, one can
1933  * memset each row of an image like so:
1934 
1935  \code
1936  Buffer<float, 3> im(100, 100, 3);
1937  im.sliced(0, 0).for_each_element([&](int y, int c) {
1938  memset(&im(0, y, c), 0, sizeof(float) * im.width());
1939  });
1940  \endcode
1941 
1942  */
1943  template<typename Fn>
1944  void for_each_element(Fn &&f) const {
1945  for_each_element_task_dim *t =
1946  (for_each_element_task_dim *)HALIDE_ALLOCA(dimensions() * sizeof(for_each_element_task_dim));
1947  for (int i = 0; i < dimensions(); i++) {
1948  t[i].min = dim(i).min();
1949  t[i].max = dim(i).max();
1950  }
1951  for_each_element(0, dimensions(), t, std::forward<Fn>(f));
1952  }
1953 
1954 private:
1955  template<typename Fn>
1956  struct FillHelper {
1957  Fn f;
1958  Buffer<T, D> *buf;
1959 
1960  template<typename... Args,
1961  typename = decltype(std::declval<Fn>()(std::declval<Args>()...))>
1962  void operator()(Args... args) {
1963  (*buf)(args...) = f(args...);
1964  }
1965 
1966  FillHelper(Fn &&f, Buffer<T, D> *buf) : f(std::forward<Fn>(f)), buf(buf) {}
1967  };
1968 
1969 public:
1970  /** Fill a buffer by evaluating a callable at every site. The
1971  * callable should look much like a callable passed to
1972  * for_each_element, but it should return the value that should be
1973  * stored to the coordinate corresponding to the arguments. */
1974  template<typename Fn,
1975  typename = typename std::enable_if<!std::is_arithmetic<typename std::decay<Fn>::type>::value>::type>
1976  void fill(Fn &&f) {
1977  // We'll go via for_each_element. We need a variadic wrapper lambda.
1978  FillHelper<Fn> wrapper(std::forward<Fn>(f), this);
1979  for_each_element(wrapper);
1980  }
1981 
1982  /** Check if an input buffer passed extern stage is a querying
1983  * bounds. Compared to doing the host pointer check directly,
1984  * this both adds clarity to code and will facilitate moving to
1985  * another representation for bounds query arguments. */
1987  return buf.is_bounds_query();
1988  }
1989 
1990 };
1991 
1992 } // namespace Runtime
1993 } // namespace Halide
1994 
1995 #undef HALIDE_ALLOCA
1996 
1997 #endif // HALIDE_RUNTIME_IMAGE_H
An iterator class, so that you can iterate over coordinates in a dimensions using a range-based for l...
Definition: HalideBuffer.h:383
bool device_dirty() const
Methods for managing any GPU allocation.
Buffer(const buffer_t &old_buf)
Make a Buffer from a legacy buffer_t.
Definition: HalideBuffer.h:477
int height() const
Conventional names for the first three dimensions.
Definition: HalideBuffer.h:948
Buffer< T, D > & operator=(const Buffer< T, D > &other)
Standard assignment operator.
Definition: HalideBuffer.h:605
static halide_type_t static_halide_type()
Get the Halide type of T.
Definition: HalideBuffer.h:144
void set_device_dirty(bool v=true)
Methods for managing any GPU allocation.
HALIDE_ALWAYS_INLINE int min() const
The lowest coordinate in this dimension.
Definition: HalideBuffer.h:361
HALIDE_ALWAYS_INLINE not_void_T & operator()(int first, Args... rest)
Access elements.
bool contains(Args... args)
Test if a given coordinate is within the the bounds of an image.
Buffer(const std::vector< int > &sizes)
Allocate a new image of known type using a vector of ints as the size.
Definition: HalideBuffer.h:780
static Buffer< T, D > make_with_shape_of(Buffer< T2, D2 > src, void *(*allocate_fn)(size_t)=nullptr, void(*deallocate_fn)(void *)=nullptr)
Make a buffer with the same shape and memory nesting order as another buffer.
void for_each_element(Fn &&f) const
Call a function at each site in a buffer.
typename std::conditional< std::is_const< T >::value, const T2, T2 >::type add_const_if_T_is_const
Definition: Buffer.h:80
Buffer(halide_type_t t, int first, Args... rest)
Allocate a new image of the given size with a runtime type.
Definition: HalideBuffer.h:714
int(* device_and_host_malloc)(void *user_context, struct halide_buffer_t *buf, const struct halide_device_interface_t *device_interface)
bool has_device_allocation() const
Methods for managing any GPU allocation.
HALIDE_ALWAYS_INLINE int max() const
The highest coordinate in this dimension.
Definition: HalideBuffer.h:377
const Buffer< T2, D2 > & as() const &
Return a const typed reference to this Buffer.
Definition: HalideBuffer.h:930
BufferDeviceOwnership device_ownership() const
Return the method by which the device field is managed.
HALIDE_ALWAYS_INLINE int extent() const
The extent of the image along this dimension.
Definition: HalideBuffer.h:372
static Buffer< void, D > make_interleaved(halide_type_t t, int width, int height, int channels)
If you use the (x, y, c) indexing convention, then Halide Buffers are stored planar by default...
int channels() const
Conventional names for the first three dimensions.
Definition: HalideBuffer.h:951
int(* wrap_native)(void *user_context, struct halide_buffer_t *buf, uint64_t handle, const struct halide_device_interface_t *device_interface)
Buffer(const halide_buffer_t &buf, BufferDeviceOwnership ownership=BufferDeviceOwnership::Unmanaged)
Make a Buffer from a halide_buffer_t.
Definition: HalideBuffer.h:470
struct halide_type_t type
The type of each buffer element.
Buffer(Buffer< T2, D2 > &&other)
Move-construct a Buffer from a Buffer of different dimensionality and type.
Definition: HalideBuffer.h:575
EXPORT RefCount & ref_count(const T *t)
Because in this header we don&#39;t yet know how client classes store their RefCount (and we don&#39;t want t...
halide_buffer_t * raw_buffer()
Get a pointer to the raw halide_buffer_t this wraps.
Definition: HalideBuffer.h:898
int dimensions() const
Get the dimensionality of the buffer.
Definition: HalideBuffer.h:426
void transpose(int d1, int d2)
Transpose an image in-place.
halide_device_detach_native will be called when device ref count goes to zero
static bool can_convert_from(const Buffer< T2, D2 > &other)
Determine if if an Buffer<T, D> can be constructed from some other Buffer type.
Definition: HalideBuffer.h:517
static Buffer< add_const_if_T_is_const< void >, D > make_interleaved(halide_type_t t, T *data, int width, int height, int channels)
Wrap an existing interleaved image.
Buffer< T, D > & operator=(Buffer< T2, D2 > &&other)
Move from another Buffer of possibly-different dimensionality and type.
Definition: HalideBuffer.h:623
int device_and_host_free(const struct halide_device_interface_t *device_interface, void *ctx=nullptr)
Methods for managing any GPU allocation.
T * data()
Get a pointer to the address of the min coordinate.
Buffer< T, D > translated(int d, int dx) const
Make an image which refers to the same data with using translated coordinates in the given dimension...
int left() const
Conventional names for the min and max value of each dimension.
Definition: HalideBuffer.h:958
void check_overflow()
Check the product of the extents fits in memory.
Definition: HalideBuffer.h:654
int bottom() const
Conventional names for the min and max value of each dimension.
Definition: HalideBuffer.h:970
void allocate(void *(*allocate_fn)(size_t)=nullptr, void(*deallocate_fn)(void *)=nullptr)
Allocate memory for this Buffer.
Definition: HalideBuffer.h:669
T * end() const
A pointer to one beyond the element with the highest address.
Definition: HalideBuffer.h:448
Buffer< T, D > cropped(int d, int min, int extent) const
Make an image that refers to a sub-range of this image along the given dimension. ...
void * memcpy(void *s1, const void *s2, size_t n)
Buffer< T, D > copy(void *(*allocate_fn)(size_t)=nullptr, void(*deallocate_fn)(void *)=nullptr) const
Make a new image which is a deep copy of this image.
Definition: HalideBuffer.h:979
int(* device_malloc)(void *user_context, struct halide_buffer_t *buf, const struct halide_device_interface_t *device_interface)
int device_and_host_malloc(const struct halide_device_interface_t *device_interface, void *ctx=nullptr)
Methods for managing any GPU allocation.
Buffer(T *data, const std::vector< int > &sizes)
Initialize an Buffer from a pointer and a vector of sizes.
Definition: HalideBuffer.h:836
void fill(Fn &&f)
Fill a buffer by evaluating a callable at every site.
HALIDE_ALWAYS_INLINE iterator begin() const
An iterator that points to the min coordinate.
Definition: HalideBuffer.h:391
Buffer< T, D > embedded(int d, int pos) const
Make a new image that views this image as a single slice in a higher-dimensional space.
No free routine will be called when device ref count goes to zero
void add_dimension_with_stride(int s)
Add a new dimension with a min of zero, an extent of one, and the specified stride.
int device_wrap_native(const struct halide_device_interface_t *device_interface, uint64_t handle, void *ctx=nullptr)
Methods for managing any GPU allocation.
int device_detach_native(void *ctx=nullptr)
Methods for managing any GPU allocation.
void for_each_value(Fn &&f, Args... other_buffers)
Call a function on every value in the buffer, and the corresponding values in some number of other bu...
buffer_t make_legacy_buffer_t() const
Populate the fields of a legacy buffer_t using this Buffer.
Definition: HalideBuffer.h:496
halide_dimension_t * dim
The shape of the buffer.
Defines methods for manipulating and analyzing boolean expressions.
void set_min(Args... args)
Set the min coordinate of an image in the first N dimensions.
int width() const
Conventional names for the first three dimensions.
Definition: HalideBuffer.h:945
int stride(int i) const
Access to the mins, strides, extents.
Definition: HalideBuffer.h:412
int extent(int i) const
Access to the mins, strides, extents.
Definition: HalideBuffer.h:411
unsigned __INT8_TYPE__ uint8_t
Buffer(halide_type_t t, add_const_if_T_is_const< void > *data, const std::vector< int > &sizes)
Initialize an Buffer of runtime type from a pointer and a vector of sizes.
Definition: HalideBuffer.h:848
HALIDE_ALWAYS_INLINE const not_void_T & operator()(int first, Args... rest) const
Access elements.
Expr min(FuncRef a, FuncRef b)
Explicit overloads of min and max for FuncRef.
Definition: Func.h:418
__PTRDIFF_TYPE__ ptrdiff_t
static Buffer< T, D > make_interleaved(T *data, int width, int height, int channels)
Wrap an existing interleaved image.
Buffer< T, D > cropped(const std::vector< std::pair< int, int >> &rect) const
Make an image that refers to a sub-rectangle of this image along the first N dimensions.
Buffer< T2, D2 > as() &&
Returns this rval Buffer with a different type attached.
Definition: HalideBuffer.h:938
static Buffer< add_const_if_T_is_const< void >, D > make_scalar(halide_type_t t)
Make a zero-dimensional Buffer.
HALIDE_ALWAYS_INLINE const not_void_T & operator()() const
Access elements.
HALIDE_ALWAYS_INLINE not_void_T & operator()(const int *pos)
Access elements.
void add_dimension()
Add a new dimension with a min of zero and an extent of one.
int(* copy_to_device)(void *user_context, struct halide_buffer_t *buf, const struct halide_device_interface_t *device_interface)
Buffer< T, D > translated(const std::vector< int > &delta)
Make an image which refers to the same data translated along the first N dimensions.
int32_t elem_size
void copy_from(const Buffer< T2, D2 > &other)
Fill a Buffer with the values at the same coordinates in another Buffer.
Definition: HalideBuffer.h:996
halide_type_t type() const
Get the type of the elements.
Definition: HalideBuffer.h:431
Buffer(T *data, int first, Args &&... rest)
Initialize an Buffer from a pointer and some sizes.
Definition: HalideBuffer.h:824
Set a custom malloc and free for halide to use.
void set_host_dirty(bool v=true)
Methods for managing any GPU allocation.
int right() const
Conventional names for the min and max value of each dimension.
Definition: HalideBuffer.h:962
Buffer< T, D > & operator=(const Buffer< T2, D2 > &other)
Assign from another Buffer of possibly-different dimensionality and type.
Definition: HalideBuffer.h:589
BufferDeviceOwnership
This indicates how to deallocate the device for a Halide::Runtime::Buffer.
Definition: HalideBuffer.h:67
Each GPU API provides a halide_device_interface_t struct pointing to the code that manages device all...
char * end
Definition: printer.h:30
Buffer(halide_type_t t, add_const_if_T_is_const< void > *data, int d, const halide_dimension_t *shape)
Initialize an Buffer from a pointer to the min coordinate and an array describing the shape...
Definition: HalideBuffer.h:862
A templated Buffer class that wraps halide_buffer_t and adds functionality.
Definition: HalideBuffer.h:35
size_t number_of_elements() const
The total number of elements this buffer represents.
Definition: HalideBuffer.h:417
int32_t extent[4]
uint64_t dev
void fill(not_void_T val)
void translate(const std::vector< int > &delta)
Translate an image along the first N dimensions.
unsigned __INT32_TYPE__ uint32_t
int min(int i) const
Access to the mins, strides, extents.
Definition: HalideBuffer.h:410
HALIDE_ALWAYS_INLINE iterator end() const
An iterator that points to one past the max coordinate.
Definition: HalideBuffer.h:396
char * dst
Definition: printer.h:30
void slice(int d, int pos)
Slice an image in-place.
T * begin() const
A pointer to the element with the lowest address.
Definition: HalideBuffer.h:437
Buffer< T, D > sliced(int d, int pos) const
Make a lower-dimensional image that refers to one slice of this image.
int device_sync(void *ctx=nullptr)
Methods for managing any GPU allocation.
HALIDE_ALWAYS_INLINE int stride() const
The number of elements in memory you have to step over to increment this coordinate by one...
Definition: HalideBuffer.h:367
void translate(int d, int delta)
Translate an image in-place along one dimension.
Buffer(T *data, int d, const halide_dimension_t *shape)
Initialize an Buffer from a pointer to the min coordinate and an array describing the shape...
Definition: HalideBuffer.h:878
static void assert_can_convert_from(const Buffer< T2, D2 > &other)
Fail an assertion at runtime or compile-time if an Buffer<T, D> cannot be constructed from some other...
Definition: HalideBuffer.h:533
HALIDE_ALWAYS_INLINE Dimension dim(int i) const
Access the shape of the buffer.
Definition: HalideBuffer.h:404
halide_device_free will be called when device ref count goes to zero
size_t size_in_bytes() const
The total number of bytes spanned by the data in memory.
Definition: HalideBuffer.h:460
HALIDE_ALWAYS_INLINE const not_void_T & operator()(const int *pos) const
Access elements.
#define HALIDE_ALWAYS_INLINE
Definition: HalideRuntime.h:27
int device_free(void *ctx=nullptr)
Methods for managing any GPU allocation.
bool owns_host_memory() const
Does this Buffer own the host memory it refers to?
Definition: HalideBuffer.h:149
A runtime tag for a type in the halide type system.
Buffer(int first, int second, Args... rest)
Allocate a new image of the given size.
Definition: HalideBuffer.h:750
int device_malloc(const struct halide_device_interface_t *device_interface, void *ctx=nullptr)
Methods for managing any GPU allocation.
const struct halide_device_interface_t * device_interface
The interface used to interpret the above handle.
unsigned __INT16_TYPE__ uint16_t
char * buf
Definition: printer.h:30
Buffer< T, D > & operator=(Buffer< T, D > &&other)
Standard move-assignment operator.
Definition: HalideBuffer.h:639
int32_t stride[4]
Buffer(halide_type_t t, const std::vector< int > &sizes)
Allocate a new image of unknown type using a vector of ints as the size.
Definition: HalideBuffer.h:765
const T * data() const
Get a pointer to the address of the min coordinate.
Dimension(const halide_dimension_t &dim)
Definition: HalideBuffer.h:400
bool operator!=(const iterator &other) const
Definition: HalideBuffer.h:386
Buffer(Buffer< T, D > &&other)
Move constructor.
Definition: HalideBuffer.h:561
void * malloc(size_t)
void deallocate()
Drop reference to any owned host or device memory, possibly freeing it, if this buffer held the last ...
Definition: HalideBuffer.h:697
uint64_t device
A device-handle for e.g.
void embed(int d, int pos)
Embed an image in-place, increasing the dimensionality.
Buffer(const Buffer< T2, D2 > &other)
Construct a Buffer from a Buffer of different dimensionality and type.
Definition: HalideBuffer.h:552
#define HALIDE_ALLOCA
Definition: HalideBuffer.h:23
A similar struct for managing device allocations.
Definition: HalideBuffer.h:75
uint8_t * host
int top() const
Conventional names for the min and max value of each dimension.
Definition: HalideBuffer.h:966
int32_t min[4]
void crop(int d, int min, int extent)
Crop an image in-place along the given dimension.
bool host_dirty() const
Methods for managing any GPU allocation.
void crop(const std::vector< std::pair< int, int >> &rect)
Crop an image in-place along the first N dimensions.
const halide_buffer_t * raw_buffer() const
Get a pointer to the raw halide_buffer_t this wraps.
Definition: HalideBuffer.h:902
The raw representation of an image passed around by generated Halide code.
A struct acting as a header for allocations owned by the Buffer class itself.
Definition: HalideBuffer.h:61
void device_deallocate()
Drop reference to any owned device memory, possibly freeing it if this buffer held the last reference...
Definition: HalideBuffer.h:704
Buffer(Array(&vals)[N])
Make an Buffer that refers to a statically sized array.
Definition: HalideBuffer.h:794
bool is_bounds_query()
Check if an input buffer passed extern stage is a querying bounds.
unsigned __INT64_TYPE__ uint64_t
static Buffer< T, D > make_scalar()
Make a zero-dimensional Buffer.
Buffer(const Buffer< T, D > &other)
Copy constructor.
Definition: HalideBuffer.h:538
Buffer< T2, D2 > & as() &
Return a typed reference to this Buffer.
Definition: HalideBuffer.h:919
Read-only access to the shape.
Definition: HalideBuffer.h:357
int copy_to_device(const struct halide_device_interface_t *device_interface, void *ctx=nullptr)
Methods for managing any GPU allocation.
Expr max(FuncRef a, FuncRef b)
Explicit overloads of min and max for FuncRef.
Definition: Func.h:419
BufferDeviceOwnership ownership
Definition: HalideBuffer.h:79
int copy_to_host(void *ctx=nullptr)
Methods for managing any GPU allocation.
static Buffer< T, D > make_interleaved(int width, int height, int channels)
If you use the (x, y, c) indexing convention, then Halide Buffers are stored planar by default...
HALIDE_ALWAYS_INLINE not_void_T & operator()()
Access elements.
Buffer(int first)
Allocate a new image of the given size.
Definition: HalideBuffer.h:735
Buffer< T, D > transposed(int d1, int d2) const
Make an image which refers to the same data using a different ordering of the dimensions.
void free(void *)
Buffer(halide_type_t t, add_const_if_T_is_const< void > *data, int first, Args &&... rest)
Initialize an Buffer of runtime type from a pointer and some sizes.
Definition: HalideBuffer.h:808