6 #ifndef HALIDE_RUNTIME_BUFFER_H
7 #define HALIDE_RUNTIME_BUFFER_H
20 #include <AvailabilityVersions.h>
21 #include <TargetConditionals.h>
24 #if defined(__has_feature)
25 #if __has_feature(memory_sanitizer)
26 #include <sanitizer/msan_interface.h>
34 #define HALIDE_ALLOCA _alloca
36 #define HALIDE_ALLOCA __builtin_alloca
40 #if __GNUC__ == 5 && __GNUC_MINOR__ == 1
41 #pragma GCC diagnostic ignored "-Warray-bounds"
44 #ifndef HALIDE_RUNTIME_BUFFER_CHECK_INDICES
45 #define HALIDE_RUNTIME_BUFFER_CHECK_INDICES 0
48 #ifndef HALIDE_RUNTIME_BUFFER_ALLOCATION_ALIGNMENT
52 #define HALIDE_RUNTIME_BUFFER_ALLOCATION_ALIGNMENT 128
56 "HALIDE_RUNTIME_BUFFER_ALLOCATION_ALIGNMENT must be a power of 2.");
64 #ifndef HALIDE_RUNTIME_BUFFER_USE_ALIGNED_ALLOC
71 #define HALIDE_RUNTIME_BUFFER_USE_ALIGNED_ALLOC 0
73 #elif defined(__ANDROID_API__) && __ANDROID_API__ < 28
76 #define HALIDE_RUNTIME_BUFFER_USE_ALIGNED_ALLOC 0
78 #elif defined(__APPLE__)
80 #if TARGET_OS_OSX && (__MAC_OS_X_VERSION_MIN_REQUIRED < __MAC_10_15)
83 #define HALIDE_RUNTIME_BUFFER_USE_ALIGNED_ALLOC 0
85 #elif TARGET_OS_IPHONE && (__IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_14_0)
88 #define HALIDE_RUNTIME_BUFFER_USE_ALIGNED_ALLOC 0
93 #define HALIDE_RUNTIME_BUFFER_USE_ALIGNED_ALLOC 1
99 #if defined(__GLIBCXX__) && !defined(_GLIBCXX_HAVE_ALIGNED_ALLOC)
102 #define HALIDE_RUNTIME_BUFFER_USE_ALIGNED_ALLOC 0
107 #define HALIDE_RUNTIME_BUFFER_USE_ALIGNED_ALLOC 1
114 #endif // HALIDE_RUNTIME_BUFFER_USE_ALIGNED_ALLOC
120 template<
typename T,
int Dims,
int InClassDimStorage>
125 template<
typename... Args>
131 template<
typename T,
typename... Args>
133 static const bool value = std::is_convertible<T, int>::value &&
AllInts<Args...>::value;
139 template<
typename... Args>
140 struct AllInts<float, Args...> : std::false_type {};
142 template<
typename... Args>
143 struct AllInts<double, Args...> : std::false_type {};
147 template<
typename Container>
213 template<
typename T = void,
232 static const bool T_is_void = std::is_same<typename std::remove_const<T>::type,
void>::value;
235 template<
typename T2>
240 using not_void_T =
typename std::conditional<T_is_void,
241 add_const_if_T_is_const<uint8_t>,
245 using not_const_T =
typename std::remove_const<T>::type;
251 using storage_T =
typename std::conditional<std::is_pointer<T>::value,
uint64_t, not_void_T>::type;
255 static constexpr
bool has_static_halide_type = !T_is_void;
260 return halide_type_of<typename std::remove_cv<not_void_T>::type>();
265 return alloc !=
nullptr;
268 static constexpr
bool has_static_dimensions = (Dims !=
AnyDims);
276 static_assert(!has_static_dimensions || static_dimensions() >= 0);
280 void incref()
const {
281 if (owns_host_memory()) {
285 if (!dev_ref_count) {
291 dev_ref_count =
new DeviceRefCount;
293 dev_ref_count->
count++;
299 struct DevRefCountCropped : DeviceRefCount {
300 Buffer<T, Dims, InClassDimStorage> cropped_from;
301 DevRefCountCropped(
const Buffer<T, Dims, InClassDimStorage> &cropped_from)
302 : cropped_from(cropped_from) {
308 void crop_from(
const Buffer<T, Dims, InClassDimStorage> &cropped_from) {
309 assert(dev_ref_count ==
nullptr);
310 dev_ref_count =
new DevRefCountCropped(cropped_from);
315 void decref(
bool device_only =
false) {
316 if (owns_host_memory() && !device_only) {
318 if (new_count == 0) {
320 alloc->~AllocationHeader();
325 set_host_dirty(
false);
329 new_count = --(dev_ref_count->
count);
331 if (new_count == 0) {
333 assert(!(alloc && device_dirty()) &&
334 "Implicitly freeing a dirty device allocation while a host allocation still lives. "
335 "Call device_free explicitly if you want to drop dirty device-side data. "
336 "Call copy_to_host explicitly if you want the data copied to the host allocation "
337 "before the device allocation is freed.");
340 result = buf.device_interface->detach_native(
nullptr, &buf);
342 result = buf.device_interface->device_and_host_free(
nullptr, &buf);
344 result = buf.device_interface->device_release_crop(
nullptr, &buf);
346 result = buf.device_interface->device_free(
nullptr, &buf);
354 delete (DevRefCountCropped *)dev_ref_count;
356 delete dev_ref_count;
360 dev_ref_count =
nullptr;
362 buf.device_interface =
nullptr;
365 void free_shape_storage() {
366 if (buf.dim != shape) {
372 template<
int DimsSpecified>
373 void make_static_shape_storage() {
374 static_assert(Dims ==
AnyDims || Dims == DimsSpecified,
375 "Number of arguments to Buffer() does not match static dimensionality");
376 buf.dimensions = DimsSpecified;
377 if constexpr (Dims ==
AnyDims) {
378 if constexpr (DimsSpecified <= InClassDimStorage) {
381 static_assert(DimsSpecified >= 1);
385 static_assert(InClassDimStorage >= Dims);
390 void make_shape_storage(
const int dimensions) {
391 if (Dims !=
AnyDims && Dims != dimensions) {
392 assert(
false &&
"Number of arguments to Buffer() does not match static dimensionality");
396 buf.dimensions = dimensions;
397 buf.dim = (dimensions <= InClassDimStorage) ? shape :
new halide_dimension_t[dimensions];
406 template<
typename T2,
int D2,
int S2>
407 void move_shape_from(Buffer<T2, D2, S2> &&other) {
408 if (other.shape == other.buf.dim) {
409 copy_shape_from(other.buf);
411 buf.dim = other.buf.dim;
412 other.buf.dim =
nullptr;
422 dev_ref_count =
new DeviceRefCount;
428 void initialize_shape(
const int *sizes) {
429 for (
int i = 0; i < buf.dimensions; i++) {
431 buf.dim[i].extent = sizes[i];
433 buf.dim[i].stride = 1;
435 buf.dim[i].stride = buf.dim[i - 1].stride * buf.dim[i - 1].extent;
441 void initialize_shape(
const std::vector<int> &sizes) {
442 assert(buf.dimensions == (
int)sizes.size());
443 initialize_shape(sizes.data());
447 template<
typename Array,
size_t N>
448 void initialize_shape_from_array_shape(
int next, Array (&vals)[N]) {
449 buf.dim[next].min = 0;
450 buf.dim[next].extent = (int)N;
452 buf.dim[next].stride = 1;
454 initialize_shape_from_array_shape(next - 1, vals[0]);
455 buf.dim[next].stride = buf.dim[next - 1].stride * buf.dim[next - 1].extent;
460 template<
typename T2>
461 void initialize_shape_from_array_shape(
int,
const T2 &) {
465 template<
typename Array,
size_t N>
466 static int dimensionality_of_array(Array (&vals)[N]) {
467 return dimensionality_of_array(vals[0]) + 1;
470 template<
typename T2>
471 static int dimensionality_of_array(
const T2 &) {
476 template<
typename Array,
size_t N>
477 static halide_type_t scalar_type_of_array(Array (&vals)[N]) {
478 return scalar_type_of_array(vals[0]);
481 template<
typename T2>
483 return halide_type_of<typename std::remove_cv<T2>::type>();
487 void crop_host(
int d,
int min,
int extent) {
488 assert(dim(d).
min() <=
min);
489 assert(dim(d).
max() >=
min + extent - 1);
491 if (buf.host !=
nullptr) {
492 buf.host += (shift * dim(d).stride()) * type().bytes();
494 buf.dim[d].min =
min;
495 buf.dim[d].extent = extent;
499 void crop_host(
const std::vector<std::pair<int, int>> &rect) {
501 int limit = (int)rect.size();
502 assert(limit <= dimensions());
503 for (
int i = 0; i < limit; i++) {
504 crop_host(i, rect[i].first, rect[i].second);
508 void complete_device_crop(Buffer<T, Dims, InClassDimStorage> &result_host_cropped)
const {
509 assert(buf.device_interface !=
nullptr);
511 const Buffer<T, Dims, InClassDimStorage> *cropped_from =
this;
517 cropped_from = &((DevRefCountCropped *)dev_ref_count)->cropped_from;
519 result_host_cropped.crop_from(*cropped_from);
524 void slice_host(
int d,
int pos) {
525 static_assert(Dims ==
AnyDims);
526 assert(dimensions() > 0);
527 assert(d >= 0 && d < dimensions());
528 assert(pos >= dim(d).
min() && pos <= dim(d).
max());
531 if (buf.host !=
nullptr) {
532 buf.host += (shift * buf.dim[d].stride) * type().bytes();
534 for (
int i = d; i < buf.dimensions; i++) {
535 buf.dim[i] = buf.dim[i + 1];
537 buf.dim[buf.dimensions] = {0, 0, 0};
540 void complete_device_slice(Buffer<T, AnyDims, InClassDimStorage> &result_host_sliced,
int d,
int pos)
const {
541 assert(buf.device_interface !=
nullptr);
542 if (buf.device_interface->device_slice(
nullptr, &this->buf, d, pos, &result_host_sliced.buf) ==
halide_error_code_success) {
543 const Buffer<T, Dims, InClassDimStorage> *sliced_from =
this;
549 sliced_from = &((DevRefCountCropped *)dev_ref_count)->cropped_from;
552 result_host_sliced.crop_from(*sliced_from);
582 return min() + extent() - 1;
593 return val != other.
val;
608 return {
min() + extent()};
618 assert(i >= 0 && i < this->dimensions());
619 return Dimension(buf.dim[i]);
628 return dim(i).extent();
631 return dim(i).stride();
638 return buf.number_of_elements();
643 if constexpr (has_static_dimensions) {
646 return buf.dimensions;
658 assert(buf.host !=
nullptr);
659 return (T *)buf.begin();
664 assert(buf.host !=
nullptr);
665 return (T *)buf.end();
670 return buf.size_in_bytes();
682 buf.type = static_halide_type();
685 constexpr
int buf_dimensions = (Dims ==
AnyDims) ? 0 : Dims;
686 make_static_shape_storage<buf_dimensions>();
692 assert(T_is_void || buf.type == static_halide_type());
693 initialize_from_buffer(buf, ownership);
697 template<
typename T2,
int D2,
int S2>
701 template<
typename T2,
int D2,
int S2>
702 static void static_assert_can_convert_from() {
703 static_assert((!std::is_const<T2>::value || std::is_const<T>::value),
704 "Can't convert from a Buffer<const T> to a Buffer<T>");
705 static_assert(std::is_same<
typename std::remove_const<T>::type,
706 typename std::remove_const<T2>::type>::value ||
708 "type mismatch constructing Buffer");
710 "Can't convert from a Buffer with static dimensionality to a Buffer with different static dimensionality");
717 template<
typename T2,
int D2,
int S2>
719 static_assert_can_convert_from<T2, D2, S2>();
721 if (other.
type() != static_halide_type()) {
735 template<
typename T2,
int D2,
int S2>
740 static_assert_can_convert_from<T2, D2, S2>();
741 assert(can_convert_from(other));
749 dev_ref_count = other.dev_ref_count;
750 copy_shape_from(other.buf);
759 template<
typename T2,
int D2,
int S2>
763 assert_can_convert_from(other);
765 dev_ref_count = other.dev_ref_count;
766 copy_shape_from(other.buf);
773 dev_ref_count(other.dev_ref_count) {
774 other.dev_ref_count =
nullptr;
775 other.alloc =
nullptr;
783 template<
typename T2,
int D2,
int S2>
787 dev_ref_count(other.dev_ref_count) {
788 assert_can_convert_from(other);
789 other.dev_ref_count =
nullptr;
790 other.alloc =
nullptr;
798 template<
typename T2,
int D2,
int S2>
800 if ((
const void *)
this == (
const void *)&other) {
803 assert_can_convert_from(other);
806 dev_ref_count = other.dev_ref_count;
808 free_shape_storage();
810 copy_shape_from(other.buf);
817 if ((
const void *)
this == (
const void *)&other) {
822 dev_ref_count = other.dev_ref_count;
824 free_shape_storage();
826 copy_shape_from(other.buf);
833 template<
typename T2,
int D2,
int S2>
835 assert_can_convert_from(other);
838 other.alloc =
nullptr;
839 dev_ref_count = other.dev_ref_count;
840 other.dev_ref_count =
nullptr;
841 free_shape_storage();
852 other.alloc =
nullptr;
853 dev_ref_count = other.dev_ref_count;
854 other.dev_ref_count =
nullptr;
855 free_shape_storage();
864 size_t size = type().bytes();
865 for (
int i = 0; i < dimensions(); i++) {
866 size *= dim(i).extent();
869 size = (size << 1) >> 1;
870 for (
int i = 0; i < dimensions(); i++) {
871 size /= dim(i).extent();
873 assert(size == (
size_t)type().bytes() &&
"Error: Overflow computing total size of buffer.");
878 void allocate(
void *(*allocate_fn)(
size_t) =
nullptr,
879 void (*deallocate_fn)(
void *) =
nullptr) {
888 const auto align_up = [=](
size_t value) ->
size_t {
889 return (value + alignment - 1) & ~(alignment - 1);
892 size_t size = size_in_bytes();
894 #if HALIDE_RUNTIME_BUFFER_USE_ALIGNED_ALLOC
896 if (!allocate_fn && !deallocate_fn) {
902 void *alloc_storage = ::aligned_alloc(alignment,
align_up(size) + alignment);
913 if (!deallocate_fn) {
914 deallocate_fn =
free;
923 const size_t requested_size =
align_up(size + alignment +
925 (
int)
sizeof(std::max_align_t)));
926 void *alloc_storage = allocate_fn(requested_size);
951 template<
typename... Args,
952 typename =
typename std::enable_if<
AllInts<Args...>::value>::type>
955 assert(static_halide_type() == t);
957 int extents[] = {first, (int)rest...};
959 constexpr
int buf_dimensions = 1 + (int)(
sizeof...(rest));
960 make_static_shape_storage<buf_dimensions>();
961 initialize_shape(extents);
975 static_assert(!T_is_void,
976 "To construct an Buffer<void>, pass a halide_type_t as the first argument to the constructor");
977 int extents[] = {first};
978 buf.type = static_halide_type();
979 constexpr
int buf_dimensions = 1;
980 make_static_shape_storage<buf_dimensions>();
981 initialize_shape(extents);
988 template<
typename... Args,
989 typename =
typename std::enable_if<
AllInts<Args...>::value>::type>
990 Buffer(
int first,
int second, Args... rest) {
991 static_assert(!T_is_void,
992 "To construct an Buffer<void>, pass a halide_type_t as the first argument to the constructor");
993 int extents[] = {first, second, (int)rest...};
994 buf.type = static_halide_type();
995 constexpr
int buf_dimensions = 2 + (int)(
sizeof...(rest));
996 make_static_shape_storage<buf_dimensions>();
997 initialize_shape(extents);
1008 assert(static_halide_type() == t);
1012 make_shape_storage((
int)sizes.size());
1013 initialize_shape(sizes);
1021 explicit Buffer(
const std::vector<int> &sizes)
1022 :
Buffer(static_halide_type(), sizes) {
1027 static std::vector<int> make_ordered_sizes(
const std::vector<int> &sizes,
const std::vector<int> &order) {
1028 assert(order.size() == sizes.size());
1029 std::vector<int> ordered_sizes(sizes.size());
1030 for (
size_t i = 0; i < sizes.size(); ++i) {
1031 ordered_sizes[i] = sizes.at(order[i]);
1033 return ordered_sizes;
1042 :
Buffer(t, make_ordered_sizes(sizes, storage_order)) {
1043 transpose(storage_order);
1046 Buffer(
const std::vector<int> &sizes,
const std::vector<int> &storage_order)
1047 :
Buffer(static_halide_type(), sizes, storage_order) {
1052 template<
typename Array,
size_t N>
1054 const int buf_dimensions = dimensionality_of_array(vals);
1055 buf.type = scalar_type_of_array(vals);
1057 make_shape_storage(buf_dimensions);
1058 initialize_shape_from_array_shape(buf.dimensions - 1, vals);
1065 template<
typename... Args,
1066 typename =
typename std::enable_if<
AllInts<Args...>::value>::type>
1069 assert(static_halide_type() == t);
1071 int extents[] = {first, (int)rest...};
1073 buf.host = (
uint8_t *)
const_cast<void *
>(data);
1074 constexpr
int buf_dimensions = 1 + (int)(
sizeof...(rest));
1075 make_static_shape_storage<buf_dimensions>();
1076 initialize_shape(extents);
1082 template<
typename... Args,
1083 typename =
typename std::enable_if<
AllInts<Args...>::value>::type>
1084 explicit Buffer(T *data,
int first, Args &&...rest) {
1085 int extents[] = {first, (int)rest...};
1086 buf.type = static_halide_type();
1087 buf.host = (
uint8_t *)
const_cast<typename std::remove_const<T>::type *
>(data);
1088 constexpr
int buf_dimensions = 1 + (int)(
sizeof...(rest));
1089 make_static_shape_storage<buf_dimensions>();
1090 initialize_shape(extents);
1097 explicit Buffer(T *data,
const std::vector<int> &sizes) {
1098 buf.type = static_halide_type();
1099 buf.host = (
uint8_t *)
const_cast<typename std::remove_const<T>::type *
>(data);
1100 make_shape_storage((
int)sizes.size());
1101 initialize_shape(sizes);
1110 assert(static_halide_type() == t);
1113 buf.host = (
uint8_t *)
const_cast<void *
>(data);
1114 make_shape_storage((
int)sizes.size());
1115 initialize_shape(sizes);
1123 assert(static_halide_type() == t);
1126 buf.host = (
uint8_t *)
const_cast<void *
>(data);
1127 make_shape_storage(d);
1128 for (
int i = 0; i < d; i++) {
1129 buf.dim[i] = shape[i];
1137 const std::vector<halide_dimension_t> &shape)
1138 :
Buffer(t, data, (int)shape.size(), shape.data()) {
1145 buf.type = static_halide_type();
1146 buf.host = (
uint8_t *)
const_cast<typename std::remove_const<T>::type *
>(data);
1147 make_shape_storage(d);
1148 for (
int i = 0; i < d; i++) {
1149 buf.dim[i] = shape[i];
1156 explicit inline Buffer(T *data,
const std::vector<halide_dimension_t> &shape)
1157 :
Buffer(data, (int)shape.size(), shape.data()) {
1165 free_shape_storage();
1192 template<
typename T2,
int D2 = Dims>
1205 template<
typename T2,
int D2 = Dims>
1218 template<
typename T2,
int D2 = Dims>
1247 template<typename T2 = T, typename = typename std::enable_if<!std::is_const<T2>::value>::type>
1254 template<
typename TVoid,
1256 typename =
typename std::enable_if<std::is_same<TVoid, void>::value &&
1257 !std::is_void<T2>::value &&
1258 !std::is_const<T2>::value>::type>
1260 return as<TVoid, Dims>();
1265 template<
typename TVoid,
1267 typename =
typename std::enable_if<std::is_same<TVoid, void>::value &&
1268 !std::is_void<T2>::value &&
1269 std::is_const<T2>::value>::type>
1271 return as<const TVoid, Dims>();
1277 return (dimensions() > 0) ? dim(0).extent() : 1;
1280 return (dimensions() > 1) ? dim(1).extent() : 1;
1283 return (dimensions() > 2) ? dim(2).extent() : 1;
1290 return dim(0).min();
1294 return dim(0).max();
1298 return dim(1).min();
1302 return dim(1).max();
1319 void (*deallocate_fn)(
void *) =
nullptr)
const {
1321 dst.copy_from(*
this);
1330 void (*deallocate_fn)(
void *) =
nullptr)
const {
1331 static_assert(Dims ==
AnyDims || Dims == 3);
1332 assert(dimensions() == 3);
1335 dst.allocate(allocate_fn, deallocate_fn);
1336 dst.copy_from(*
this);
1344 void (*deallocate_fn)(
void *) =
nullptr)
const {
1345 std::vector<int> mins, extents;
1346 const int dims = dimensions();
1348 extents.reserve(dims);
1349 for (
int d = 0; d < dims; ++d) {
1350 mins.push_back(dim(d).
min());
1351 extents.push_back(dim(d).extent());
1355 dst.allocate(allocate_fn, deallocate_fn);
1356 dst.copy_from(*
this);
1382 template<
typename T2,
int D2,
int S2>
1384 static_assert(!std::is_const<T>::value,
"Cannot call copy_from() on a Buffer<const T>");
1385 assert(!device_dirty() &&
"Cannot call Halide::Runtime::Buffer::copy_from on a device dirty destination.");
1386 assert(!src.
device_dirty() &&
"Cannot call Halide::Runtime::Buffer::copy_from on a device dirty source.");
1391 assert(src.
dimensions() == dst.dimensions());
1394 const int d = dimensions();
1395 for (
int i = 0; i < d; i++) {
1396 int min_coord =
std::max(dst.dim(i).min(), src.
dim(i).min());
1397 int max_coord =
std::min(dst.dim(i).max(), src.
dim(i).max());
1398 if (max_coord < min_coord) {
1402 dst.crop(i, min_coord, max_coord - min_coord + 1);
1403 src.
crop(i, min_coord, max_coord - min_coord + 1);
1410 if (T_is_void ? (type().bytes() == 1) : (
sizeof(not_void_T) == 1)) {
1414 typed_dst.
for_each_value([&](MemType &dst, MemType src) { dst = src; }, typed_src);
1415 }
else if (T_is_void ? (type().bytes() == 2) : (
sizeof(not_void_T) == 2)) {
1419 typed_dst.
for_each_value([&](MemType &dst, MemType src) { dst = src; }, typed_src);
1420 }
else if (T_is_void ? (type().bytes() == 4) : (
sizeof(not_void_T) == 4)) {
1424 typed_dst.
for_each_value([&](MemType &dst, MemType src) { dst = src; }, typed_src);
1425 }
else if (T_is_void ? (type().bytes() == 8) : (
sizeof(not_void_T) == 8)) {
1429 typed_dst.
for_each_value([&](MemType &dst, MemType src) { dst = src; }, typed_src);
1431 assert(
false &&
"type().bytes() must be 1, 2, 4, or 8");
1450 im.crop_host(d,
min, extent);
1451 if (buf.device_interface !=
nullptr) {
1452 complete_device_crop(im);
1465 if (buf.device_interface !=
nullptr) {
1466 *
this = cropped(d,
min, extent);
1468 crop_host(d,
min, extent);
1487 if (buf.device_interface !=
nullptr) {
1488 complete_device_crop(im);
1497 void crop(
const std::vector<std::pair<int, int>> &rect) {
1502 if (buf.device_interface !=
nullptr) {
1503 *
this = cropped(rect);
1522 assert(d >= 0 && d < this->dimensions());
1523 device_deallocate();
1524 buf.dim[d].min += delta;
1538 device_deallocate();
1540 int limit = (int)delta.size();
1541 assert(limit <= dimensions());
1542 for (
int i = 0; i < limit; i++) {
1543 translate(i, delta[i]);
1550 assert(mins.size() <=
static_cast<decltype(mins.size())
>(dimensions()));
1551 device_deallocate();
1552 for (
size_t i = 0; i < mins.size(); i++) {
1553 buf.dim[i].min = mins[i];
1557 template<
typename... Args>
1559 set_min(std::vector<int>{args...});
1566 assert(coords.size() <=
static_cast<decltype(coords.size())
>(dimensions()));
1567 for (
size_t i = 0; i < coords.size(); i++) {
1568 if (coords[i] < dim((
int)i).
min() || coords[i] > dim((
int)i).
max()) {
1575 template<
typename... Args>
1577 return contains(std::vector<int>{args...});
1599 assert(d1 >= 0 && d1 < this->dimensions());
1600 assert(d2 >= 0 && d2 < this->dimensions());
1601 std::swap(buf.dim[d1], buf.dim[d2]);
1609 assert((
int)order.size() == dimensions());
1610 if (dimensions() < 2) {
1615 std::vector<int> order_sorted = order;
1616 for (
size_t i = 1; i < order_sorted.size(); i++) {
1617 for (
size_t j = i; j > 0 && order_sorted[j - 1] > order_sorted[j]; j--) {
1618 std::swap(order_sorted[j], order_sorted[j - 1]);
1619 transpose(j, j - 1);
1636 static_assert(Dims ==
AnyDims || Dims > 0,
"Cannot slice a 0-dimensional buffer");
1637 assert(dimensions() > 0);
1646 im.slice_host(d, pos);
1647 if (buf.device_interface !=
nullptr) {
1648 complete_device_slice(im, d, pos);
1657 static_assert(Dims ==
AnyDims || Dims > 0,
"Cannot slice a 0-dimensional buffer");
1658 assert(dimensions() > 0);
1660 return sliced(d, dim(d).
min());
1669 static_assert(Dims ==
AnyDims,
"Cannot call slice() on a Buffer with static dimensionality.");
1670 assert(dimensions() > 0);
1676 if (buf.device_interface !=
nullptr) {
1677 *
this = sliced(d, pos);
1708 static_assert(Dims ==
AnyDims,
"Cannot call embed() on a Buffer with static dimensionality.");
1709 assert(d >= 0 && d <= dimensions());
1711 translate(dimensions() - 1, pos);
1712 for (
int i = dimensions() - 1; i > d; i--) {
1713 transpose(i, i - 1);
1722 static_assert(Dims ==
AnyDims,
"Cannot call add_dimension() on a Buffer with static dimensionality.");
1723 const int dims = buf.dimensions;
1725 if (buf.dim != shape) {
1728 for (
int i = 0; i < dims; i++) {
1729 new_shape[i] = buf.dim[i];
1732 buf.dim = new_shape;
1733 }
else if (dims == InClassDimStorage) {
1735 make_shape_storage(buf.dimensions);
1736 for (
int i = 0; i < dims; i++) {
1737 buf.dim[i] = shape[i];
1742 buf.dim[dims] = {0, 1, 0};
1744 buf.dim[dims].stride = 1;
1746 buf.dim[dims].stride = buf.dim[dims - 1].extent * buf.dim[dims - 1].stride;
1755 buf.dim[buf.dimensions - 1].stride = s;
1764 assert((!v || !device_dirty()) &&
"Cannot set host dirty when device is already dirty. Call copy_to_host() before accessing the buffer from host.");
1765 buf.set_host_dirty(v);
1773 return buf.device_dirty();
1777 return buf.host_dirty();
1781 assert((!v || !host_dirty()) &&
"Cannot set device dirty when host is already dirty.");
1782 buf.set_device_dirty(v);
1786 if (device_dirty()) {
1787 return buf.device_interface->copy_to_host(ctx, &buf);
1794 return device_interface->
copy_to_device(ctx, &buf, device_interface);
1800 return device_interface->
device_malloc(ctx, &buf, device_interface);
1804 if (dev_ref_count) {
1806 "Can't call device_free on an unmanaged or wrapped native device handle. "
1807 "Free the source allocation or call device_detach_native instead.");
1809 assert(dev_ref_count->
count == 1 &&
1810 "Multiple Halide::Runtime::Buffer objects share this device "
1811 "allocation. Freeing it would create dangling references. "
1812 "Don't call device_free on Halide buffers that you have copied or "
1813 "passed by value.");
1816 if (buf.device_interface) {
1817 ret = buf.device_interface->device_free(ctx, &buf);
1819 if (dev_ref_count) {
1820 delete dev_ref_count;
1821 dev_ref_count =
nullptr;
1827 uint64_t handle,
void *ctx =
nullptr) {
1828 assert(device_interface);
1831 return device_interface->
wrap_native(ctx, &buf, handle, device_interface);
1835 assert(dev_ref_count &&
1837 "Only call device_detach_native on buffers wrapping a native "
1838 "device handle via device_wrap_native. This buffer was allocated "
1839 "using device_malloc, or is unmanaged. "
1840 "Call device_free or free the original allocation instead.");
1842 assert(dev_ref_count->
count == 1 &&
1843 "Multiple Halide::Runtime::Buffer objects share this device "
1844 "allocation. Freeing it could create dangling references. "
1845 "Don't call device_detach_native on Halide buffers that you "
1846 "have copied or passed by value.");
1848 if (buf.device_interface) {
1849 ret = buf.device_interface->detach_native(ctx, &buf);
1851 delete dev_ref_count;
1852 dev_ref_count =
nullptr;
1861 if (dev_ref_count) {
1863 "Can't call device_and_host_free on a device handle not allocated with device_and_host_malloc. "
1864 "Free the source allocation or call device_detach_native instead.");
1866 assert(dev_ref_count->
count == 1 &&
1867 "Multiple Halide::Runtime::Buffer objects share this device "
1868 "allocation. Freeing it would create dangling references. "
1869 "Don't call device_and_host_free on Halide buffers that you have copied or "
1870 "passed by value.");
1873 if (buf.device_interface) {
1874 ret = buf.device_interface->device_and_host_free(ctx, &buf);
1876 if (dev_ref_count) {
1877 delete dev_ref_count;
1878 dev_ref_count =
nullptr;
1884 return buf.device_sync(ctx);
1888 return buf.device != 0;
1893 if (dev_ref_count ==
nullptr) {
1907 static_assert(Dims ==
AnyDims || Dims == 3,
"make_interleaved() must be called on a Buffer that can represent 3 dimensions.");
1923 return make_interleaved(static_halide_type(), width, height, channels);
1929 static_assert(Dims ==
AnyDims || Dims == 3,
"make_interleaved() must be called on a Buffer that can represent 3 dimensions.");
1938 return make_interleaved(static_halide_type(), data, width, height, channels);
1943 static_assert(Dims ==
AnyDims || Dims == 0,
"make_scalar() must be called on a Buffer that can represent 0 dimensions.");
1951 static_assert(Dims ==
AnyDims || Dims == 0,
"make_scalar() must be called on a Buffer that can represent 0 dimensions.");
1959 static_assert(Dims ==
AnyDims || Dims == 0,
"make_scalar() must be called on a Buffer that can represent 0 dimensions.");
1967 template<
typename T2,
int D2,
int S2>
1969 void *(*allocate_fn)(
size_t) =
nullptr,
1970 void (*deallocate_fn)(
void *) =
nullptr) {
1971 static_assert(Dims == D2 || Dims ==
AnyDims);
1972 const halide_type_t dst_type = T_is_void ? src.
type() : halide_type_of<typename std::remove_cv<not_void_T>::type>();
1974 allocate_fn, deallocate_fn);
1981 void *(*allocate_fn)(
size_t),
1982 void (*deallocate_fn)(
void *)) {
1984 std::vector<int> swaps;
1985 for (
int i = dimensions - 1; i > 0; i--) {
1986 for (
int j = i; j > 0; j--) {
1987 if (shape[j - 1].stride > shape[j].stride) {
1988 std::swap(shape[j - 1], shape[j]);
1996 for (
int i = 0; i < dimensions; i++) {
2005 while (!swaps.empty()) {
2006 int j = swaps.back();
2007 std::swap(shape[j - 1], shape[j]);
2013 Buffer<> dst(dst_type,
nullptr, dimensions, shape);
2014 dst.allocate(allocate_fn, deallocate_fn);
2019 template<
typename... Args>
2022 offset_of(
int d,
int first, Args... rest)
const {
2023 #if HALIDE_RUNTIME_BUFFER_CHECK_INDICES
2024 assert(first >= this->buf.
dim[d].
min);
2025 assert(first < this->buf.dim[d].min + this->buf.dim[d].extent);
2027 return offset_of(d + 1, rest...) + (
ptrdiff_t)this->buf.
dim[d].
stride * (first - this->buf.dim[d].min);
2035 template<
typename... Args>
2038 address_of(Args... args)
const {
2040 return (storage_T *)(this->buf.
host) + offset_of(0, args...) * type().bytes();
2042 return (storage_T *)(this->buf.
host) + offset_of(0, args...);
2047 ptrdiff_t offset_of(
const int *pos)
const {
2049 for (
int i = this->dimensions() - 1; i >= 0; i--) {
2050 #if HALIDE_RUNTIME_BUFFER_CHECK_INDICES
2051 assert(pos[i] >= this->buf.
dim[i].
min);
2052 assert(pos[i] < this->buf.
dim[i].
min + this->buf.dim[i].extent);
2060 storage_T *address_of(
const int *pos)
const {
2062 return (storage_T *)this->buf.
host + offset_of(pos) * type().bytes();
2064 return (storage_T *)this->buf.
host + offset_of(pos);
2071 return (T *)(this->buf.
host);
2081 template<
typename... Args,
2082 typename =
typename std::enable_if<
AllInts<Args...>::value>::type>
2084 static_assert(!T_is_void,
2085 "Cannot use operator() on Buffer<void> types");
2086 constexpr
int expected_dims = 1 + (int)(
sizeof...(rest));
2087 static_assert(Dims ==
AnyDims || Dims == expected_dims,
"Buffer with static dimensions was accessed with the wrong number of coordinates in operator()");
2088 assert(!device_dirty());
2089 return *((
const not_void_T *)(address_of(first, rest...)));
2095 static_assert(!T_is_void,
2096 "Cannot use operator() on Buffer<void> types");
2097 constexpr
int expected_dims = 0;
2098 static_assert(Dims ==
AnyDims || Dims == expected_dims,
"Buffer with static dimensions was accessed with the wrong number of coordinates in operator()");
2099 assert(!device_dirty());
2100 return *((
const not_void_T *)(data()));
2106 static_assert(!T_is_void,
2107 "Cannot use operator() on Buffer<void> types");
2108 assert(!device_dirty());
2109 return *((
const not_void_T *)(address_of(pos)));
2112 template<
typename... Args,
2113 typename =
typename std::enable_if<
AllInts<Args...>::value>::type>
2117 static_assert(!T_is_void,
2118 "Cannot use operator() on Buffer<void> types");
2119 constexpr
int expected_dims = 1 + (int)(
sizeof...(rest));
2120 static_assert(Dims ==
AnyDims || Dims == expected_dims,
"Buffer with static dimensions was accessed with the wrong number of coordinates in operator()");
2122 return *((not_void_T *)(address_of(first, rest...)));
2128 static_assert(!T_is_void,
2129 "Cannot use operator() on Buffer<void> types");
2130 constexpr
int expected_dims = 0;
2131 static_assert(Dims ==
AnyDims || Dims == expected_dims,
"Buffer with static dimensions was accessed with the wrong number of coordinates in operator()");
2133 return *((not_void_T *)(data()));
2139 static_assert(!T_is_void,
2140 "Cannot use operator() on Buffer<void> types");
2142 return *((not_void_T *)(address_of(pos)));
2148 bool all_equal =
true;
2149 for_each_element([&](
const int *pos) { all_equal &= (*this)(pos) == val; });
2155 for_each_value([=](T &v) { v = val; });
2163 struct for_each_value_task_dim {
2171 template<
typename Ptr,
typename... Ptrs>
2174 advance_ptrs(stride + 1, ptrs...);
2181 template<
typename Fn,
typename Ptr,
typename... Ptrs>
2182 HALIDE_NEVER_INLINE static void for_each_value_helper(Fn &&f,
int d,
bool innermost_strides_are_one,
2183 const for_each_value_task_dim<
sizeof...(Ptrs) + 1> *t, Ptr ptr, Ptrs... ptrs) {
2185 if (innermost_strides_are_one) {
2186 Ptr end = ptr + t[0].extent;
2187 while (ptr != end) {
2188 f(*ptr++, (*ptrs++)...);
2192 f(*ptr, (*ptrs)...);
2193 advance_ptrs(t[0].stride, ptr, ptrs...);
2198 for_each_value_helper(f, d - 1, innermost_strides_are_one, t, ptr, ptrs...);
2199 advance_ptrs(t[d].stride, ptr, ptrs...);
2206 HALIDE_NEVER_INLINE static std::pair<int, bool> for_each_value_prep(for_each_value_task_dim<N> *t,
2208 const int dimensions = buffers[0]->
dimensions;
2209 assert(dimensions > 0);
2212 for (
int i = 0; i < N; i++) {
2213 if (buffers[i]->device) {
2214 assert(buffers[i]->host &&
2215 "Buffer passed to for_each_value has device allocation but no host allocation. Call allocate() and copy_to_host() first");
2216 assert(!buffers[i]->device_dirty() &&
2217 "Buffer passed to for_each_value is dirty on device. Call copy_to_host() first");
2219 assert(buffers[i]->host &&
2220 "Buffer passed to for_each_value has no host or device allocation");
2225 for (
int i = 0; i < dimensions; i++) {
2226 for (
int j = 0; j < N; j++) {
2227 assert(buffers[j]->dimensions == dimensions);
2228 assert(buffers[j]->dim[i].extent == buffers[0]->dim[i].extent &&
2229 buffers[j]->dim[i].
min == buffers[0]->dim[i].
min);
2230 const int s = buffers[j]->
dim[i].
stride;
2233 t[i].extent = buffers[0]->
dim[i].
extent;
2238 for (
int j = i; j > 0 && t[j].stride[N - 1] < t[j - 1].stride[N - 1]; j--) {
2239 std::swap(t[j], t[j - 1]);
2246 for (
int i = 1; i < d; i++) {
2248 for (
int j = 0; j < N; j++) {
2249 flat = flat && t[i - 1].stride[j] * t[i - 1].extent == t[i].stride[j];
2252 t[i - 1].extent *= t[i].extent;
2253 for (
int j = i; j < d - 1; j++) {
2264 bool innermost_strides_are_one =
true;
2265 for (
int i = 0; i < N; i++) {
2266 innermost_strides_are_one &= (t[0].stride[i] == 1);
2269 return {d, innermost_strides_are_one};
2272 template<
typename Fn,
typename... Args,
int N =
sizeof...(Args) + 1>
2273 void for_each_value_impl(Fn &&f, Args &&...other_buffers)
const {
2274 if (dimensions() > 0) {
2275 const size_t alloc_size = dimensions() *
sizeof(for_each_value_task_dim<N>);
2276 Buffer<>::for_each_value_task_dim<N> *t =
2277 (Buffer<>::for_each_value_task_dim<N> *)
HALIDE_ALLOCA(alloc_size);
2281 auto [new_dims, innermost_strides_are_one] = Buffer<>::for_each_value_prep(t, buffers);
2283 Buffer<>::for_each_value_helper(f, new_dims - 1,
2284 innermost_strides_are_one,
2286 data(), (other_buffers.data())...);
2293 f(*data(), (*other_buffers.data())...);
2313 template<
typename Fn,
typename... Args,
int N =
sizeof...(Args) + 1>
2315 for_each_value_impl(f, std::forward<Args>(other_buffers)...);
2319 template<
typename Fn,
typename... Args,
int N =
sizeof...(Args) + 1>
2323 for_each_value_impl(f, std::forward<Args>(other_buffers)...);
2330 struct for_each_element_task_dim {
2337 template<
typename Fn,
2339 typename = decltype(std::declval<Fn>()(std::declval<Args>()...))>
2340 HALIDE_ALWAYS_INLINE static void for_each_element_variadic(
int,
int,
const for_each_element_task_dim *, Fn &&f, Args... args) {
2346 template<
typename Fn,
2348 HALIDE_ALWAYS_INLINE static void for_each_element_variadic(
double,
int d,
const for_each_element_task_dim *t, Fn &&f, Args... args) {
2349 for (
int i = t[d].
min; i <= t[d].max; i++) {
2350 for_each_element_variadic(0, d - 1, t, std::forward<Fn>(f), i, args...);
2356 template<
typename Fn,
2358 typename = decltype(std::declval<Fn>()(std::declval<Args>()...))>
2360 return (
int)(
sizeof...(Args));
2366 template<
typename Fn,
2369 static_assert(
sizeof...(args) <= 256,
2370 "Callable passed to for_each_element must accept either a const int *,"
2371 " or up to 256 ints. No such operator found. Expect infinite template recursion.");
2372 return num_args(0, std::forward<Fn>(f), 0, args...);
2382 typename =
typename std::enable_if<(d >= 0)>::type>
2383 HALIDE_ALWAYS_INLINE static void for_each_element_array_helper(
int,
const for_each_element_task_dim *t, Fn &&f,
int *pos) {
2384 for (pos[d] = t[d].
min; pos[d] <= t[d].max; pos[d]++) {
2385 for_each_element_array_helper<d - 1>(0, t, std::forward<Fn>(f), pos);
2392 typename =
typename std::enable_if<(d < 0)>::type>
2393 HALIDE_ALWAYS_INLINE static void for_each_element_array_helper(
double,
const for_each_element_task_dim *t, Fn &&f,
int *pos) {
2402 template<
typename Fn>
2403 static void for_each_element_array(
int d,
const for_each_element_task_dim *t, Fn &&f,
int *pos) {
2406 }
else if (d == 0) {
2410 for_each_element_array_helper<0, Fn>(0, t, std::forward<Fn>(f), pos);
2411 }
else if (d == 1) {
2412 for_each_element_array_helper<1, Fn>(0, t, std::forward<Fn>(f), pos);
2413 }
else if (d == 2) {
2414 for_each_element_array_helper<2, Fn>(0, t, std::forward<Fn>(f), pos);
2415 }
else if (d == 3) {
2416 for_each_element_array_helper<3, Fn>(0, t, std::forward<Fn>(f), pos);
2418 for (pos[d] = t[d].
min; pos[d] <= t[d].max; pos[d]++) {
2419 for_each_element_array(d - 1, t, std::forward<Fn>(f), pos);
2427 template<
typename Fn,
2428 typename = decltype(std::declval<Fn>()((
const int *)
nullptr))>
2429 static void for_each_element(
int,
int dims,
const for_each_element_task_dim *t, Fn &&f,
int check = 0) {
2430 const int size = dims *
sizeof(int);
2435 for_each_element_array(dims - 1, t, std::forward<Fn>(f), pos);
2440 template<
typename Fn>
2441 HALIDE_ALWAYS_INLINE static void for_each_element(
double,
int dims,
const for_each_element_task_dim *t, Fn &&f) {
2442 int args = num_args(0, std::forward<Fn>(f));
2443 assert(dims >= args);
2444 for_each_element_variadic(0, args - 1, t, std::forward<Fn>(f));
2447 template<
typename Fn>
2448 void for_each_element_impl(Fn &&f)
const {
2449 for_each_element_task_dim *t =
2450 (for_each_element_task_dim *)
HALIDE_ALLOCA(dimensions() *
sizeof(for_each_element_task_dim));
2451 for (
int i = 0; i < dimensions(); i++) {
2452 t[i].min = dim(i).min();
2453 t[i].max = dim(i).max();
2455 for_each_element(0, dimensions(), t, std::forward<Fn>(f));
2516 template<
typename Fn>
2518 for_each_element_impl(f);
2522 template<
typename Fn>
2526 for_each_element_impl(f);
2532 template<
typename Fn>
2537 template<
typename... Args,
2538 typename = decltype(std::declval<Fn>()(std::declval<Args>()...))>
2539 void operator()(Args... args) {
2540 (*buf)(args...) = f(args...);
2544 : f(std::forward<Fn>(f)), buf(buf) {
2553 template<
typename Fn,
2554 typename =
typename std::enable_if<!std::is_arithmetic<typename std::decay<Fn>::type>::value>::type>
2557 FillHelper<Fn> wrapper(std::forward<Fn>(f),
this);
2558 return for_each_element(wrapper);
2566 return buf.is_bounds_query();
2575 #if defined(__has_feature)
2576 #if __has_feature(memory_sanitizer)
2578 __msan_check_mem_is_initialized(data(), size_in_bytes());
2580 for_each_value([](T &v) { __msan_check_mem_is_initialized(&v,
sizeof(T)); ; });
2590 #undef HALIDE_ALLOCA
2592 #endif // HALIDE_RUNTIME_IMAGE_H