#include "Halide.h"
#include <stdio.h>
int main(int argc, char **argv) {
{
Func producer(
"producer_default"), consumer(
"consumer_default");
producer(x, y) = sin(x * y);
consumer(x, y) = (producer(x, y) +
producer(x, y + 1) +
producer(x + 1, y) +
producer(x + 1, y + 1)) / 4;
printf("\nEvaluating producer-consumer pipeline with default schedule\n");
float result[4][4];
for (int y = 0; y < 4; y++) {
for (int x = 0; x < 4; x++) {
result[y][x] = (sin(x * y) +
sin(x * (y + 1)) +
sin((x + 1) * y) +
sin((x + 1) * (y + 1))) / 4;
}
}
printf("\n");
printf("Pseudo-code for the schedule:\n");
printf("\n");
}
{
Func producer(
"producer_root"), consumer(
"consumer_root");
producer(x, y) = sin(x * y);
consumer(x, y) = (producer(x, y) +
producer(x, y + 1) +
producer(x + 1, y) +
producer(x + 1, y + 1)) / 4;
printf("\nEvaluating producer.compute_root()\n");
float result[4][4];
float producer_storage[5][5];
for (int y = 0; y < 5; y++) {
for (int x = 0; x < 5; x++) {
producer_storage[y][x] = sin(x * y);
}
}
for (int y = 0; y < 4; y++) {
for (int x = 0; x < 4; x++) {
result[y][x] = (producer_storage[y][x] +
producer_storage[y + 1][x] +
producer_storage[y][x + 1] +
producer_storage[y + 1][x + 1]) / 4;
}
}
printf("Pseudo-code for the schedule:\n");
printf("\n");
}
{
Func producer(
"producer_y"), consumer(
"consumer_y");
producer(x, y) = sin(x * y);
consumer(x, y) = (producer(x, y) +
producer(x, y + 1) +
producer(x + 1, y) +
producer(x + 1, y + 1)) / 4;
printf("\nEvaluating producer.compute_at(consumer, y)\n");
float result[4][4];
for (int y = 0; y < 4; y++) {
float producer_storage[2][5];
for (int py = y; py < y + 2; py++) {
for (int px = 0; px < 5; px++) {
producer_storage[py - y][px] = sin(px * py);
}
}
for (int x = 0; x < 4; x++) {
result[y][x] = (producer_storage[0][x] +
producer_storage[1][x] +
producer_storage[0][x + 1] +
producer_storage[1][x + 1]) / 4;
}
}
printf("Pseudo-code for the schedule:\n");
printf("\n");
}
{
Func producer(
"producer_root_y"), consumer(
"consumer_root_y");
producer(x, y) = sin(x * y);
consumer(x, y) = (producer(x, y) +
producer(x, y + 1) +
producer(x + 1, y) +
producer(x + 1, y + 1)) / 4;
printf("\nEvaluating producer.store_root().compute_at(consumer, y)\n");
float result[4][4];
float producer_storage[5][5];
for (int y = 0; y < 4; y++) {
for (int py = y; py < y + 2; py++) {
if (y > 0 && py == y) continue;
for (int px = 0; px < 5; px++) {
producer_storage[py][px] = sin(px * py);
}
}
for (int x = 0; x < 4; x++) {
result[y][x] = (producer_storage[y][x] +
producer_storage[y + 1][x] +
producer_storage[y][x + 1] +
producer_storage[y + 1][x + 1]) / 4;
}
}
printf("Pseudo-code for the schedule:\n");
printf("\n");
{
float producer_storage[2][5];
for (int y = 0; y < 4; y++) {
for (int py = y; py < y + 2; py++) {
if (y > 0 && py == y) continue;
for (int px = 0; px < 5; px++) {
producer_storage[py & 1][px] = sin(px * py);
}
}
for (int x = 0; x < 4; x++) {
result[y][x] = (producer_storage[y & 1][x] +
producer_storage[(y + 1) & 1][x] +
producer_storage[y & 1][x + 1] +
producer_storage[(y + 1) & 1][x + 1]) / 4;
}
}
}
}
{
Func producer(
"producer_root_x"), consumer(
"consumer_root_x");
producer(x, y) = sin(x * y);
consumer(x, y) = (producer(x, y) +
producer(x, y + 1) +
producer(x + 1, y) +
producer(x + 1, y + 1)) / 4;
printf("\nEvaluating producer.store_root().compute_at(consumer, x)\n");
float result[4][4];
float producer_storage[2][5];
for (int y = 0; y < 4; y++) {
for (int x = 0; x < 4; x++) {
if (y == 0 && x == 0) {
producer_storage[y & 1][x] = sin(x * y);
}
if (y == 0) {
producer_storage[y & 1][x + 1] = sin((x + 1) * y);
}
if (x == 0) {
producer_storage[(y + 1) & 1][x] = sin(x * (y + 1));
}
producer_storage[(y + 1) & 1][x + 1] = sin((x + 1) * (y + 1));
result[y][x] = (producer_storage[y & 1][x] +
producer_storage[(y + 1) & 1][x] +
producer_storage[y & 1][x + 1] +
producer_storage[(y + 1) & 1][x + 1]) / 4;
}
}
printf("Pseudo-code for the schedule:\n");
printf("\n");
}
{
Func producer(
"producer_tile"), consumer(
"consumer_tile");
producer(x, y) = sin(x * y);
consumer(x, y) = (producer(x, y) +
producer(x, y + 1) +
producer(x + 1, y) +
producer(x + 1, y + 1)) / 4;
Var x_outer, y_outer, x_inner, y_inner;
consumer.
tile(x, y, x_outer, y_outer, x_inner, y_inner, 4, 4);
printf("\nEvaluating:\n"
"consumer.tile(x, y, x_outer, y_outer, x_inner, y_inner, 4, 4);\n"
"producer.compute_at(consumer, x_outer);\n");
float result[8][8];
for (int y_outer = 0; y_outer < 2; y_outer++) {
for (int x_outer = 0; x_outer < 2; x_outer++) {
int x_base = x_outer * 4;
int y_base = y_outer * 4;
float producer_storage[5][5];
for (int py = y_base; py < y_base + 5; py++) {
for (int px = x_base; px < x_base + 5; px++) {
producer_storage[py - y_base][px - x_base] = sin(px * py);
}
}
for (int y_inner = 0; y_inner < 4; y_inner++) {
for (int x_inner = 0; x_inner < 4; x_inner++) {
int x = x_base + x_inner;
int y = y_base + y_inner;
result[y][x] =
(producer_storage[y - y_base][x - x_base] +
producer_storage[y - y_base + 1][x - x_base] +
producer_storage[y - y_base][x - x_base + 1] +
producer_storage[y - y_base + 1][x - x_base + 1]) / 4;
}
}
}
}
printf("Pseudo-code for the schedule:\n");
printf("\n");
}
{
Func producer(
"producer_mixed"), consumer(
"consumer_mixed");
producer(x, y) = sin(x * y);
consumer(x, y) = (producer(x, y) +
producer(x, y + 1) +
producer(x + 1, y) +
producer(x + 1, y + 1)) / 4;
consumer.
split(y, yo, yi, 16);
float c_result[160][160];
for (int yo = 0; yo < 160 / 16; yo++) {
int y_base = yo * 16;
float producer_storage[2][161];
for (int yi = 0; yi < 16; yi++) {
int y = y_base + yi;
for (int py = y; py < y + 2; py++) {
if (yi > 0 && py == y) continue;
for (int x_vec = 0; x_vec < 160 / 4 + 1; x_vec++) {
int x_base = x_vec * 4;
if (x_base > 161 - 4) x_base = 161 - 4;
int x[] = {x_base, x_base + 1, x_base + 2, x_base + 3};
float vec[4] = {sinf(x[0] * py), sinf(x[1] * py),
sinf(x[2] * py), sinf(x[3] * py)};
producer_storage[py & 1][x[0]] = vec[0];
producer_storage[py & 1][x[1]] = vec[1];
producer_storage[py & 1][x[2]] = vec[2];
producer_storage[py & 1][x[3]] = vec[3];
}
}
for (int x_vec = 0; x_vec < 160 / 4; x_vec++) {
int x_base = x_vec * 4;
int x[] = {x_base, x_base + 1, x_base + 2, x_base + 3};
float vec[] = {
(producer_storage[y & 1][x[0]] +
producer_storage[(y + 1) & 1][x[0]] +
producer_storage[y & 1][x[0] + 1] +
producer_storage[(y + 1) & 1][x[0] + 1]) /
4,
(producer_storage[y & 1][x[1]] +
producer_storage[(y + 1) & 1][x[1]] +
producer_storage[y & 1][x[1] + 1] +
producer_storage[(y + 1) & 1][x[1] + 1]) /
4,
(producer_storage[y & 1][x[2]] +
producer_storage[(y + 1) & 1][x[2]] +
producer_storage[y & 1][x[2] + 1] +
producer_storage[(y + 1) & 1][x[2] + 1]) /
4,
(producer_storage[y & 1][x[3]] +
producer_storage[(y + 1) & 1][x[3]] +
producer_storage[y & 1][x[3] + 1] +
producer_storage[(y + 1) & 1][x[3] + 1]) /
4};
c_result[y][x[0]] = vec[0];
c_result[y][x[1]] = vec[1];
c_result[y][x[2]] = vec[2];
c_result[y][x[3]] = vec[3];
}
}
}
printf("Pseudo-code for the schedule:\n");
printf("\n");
for (int y = 0; y < 160; y++) {
for (int x = 0; x < 160; x++) {
float error = halide_result(x, y) - c_result[y][x];
if (error < -0.001f || error > 0.001f) {
printf("halide_result(%d, %d) = %f instead of %f\n",
x, y, halide_result(x, y), c_result[y][x]);
return -1;
}
}
}
}
printf("Success!\n");
return 0;
}
A Halide::Buffer is a named shared reference to a Halide::Runtime::Buffer.
void print_loop_nest()
Write out the loop nests specified by the schedule for this Function.
Func & trace_stores()
Trace all stores to the buffer backing this Func by emitting calls to halide_trace.
Func & split(const VarOrRVar &old, const VarOrRVar &outer, const VarOrRVar &inner, const Expr &factor, TailStrategy tail=TailStrategy::Auto)
Split a dimension into inner and outer subdimensions with the given names, where the inner dimension ...
Func & tile(const VarOrRVar &x, const VarOrRVar &y, const VarOrRVar &xo, const VarOrRVar &yo, const VarOrRVar &xi, const VarOrRVar &yi, const Expr &xfactor, const Expr &yfactor, TailStrategy tail=TailStrategy::Auto)
Split two dimensions at once by the given factors, and then reorder the resulting dimensions to be xi...
Func & compute_root()
Compute all of this function once ahead of time.
Func & store_at(const Func &f, const Var &var)
Allocate storage for this function within f's loop over var.
Realization realize(std::vector< int32_t > sizes={}, const Target &target=Target())
Evaluate this function over some rectangular domain and return the resulting buffer or buffers.
Func & store_root()
Equivalent to Func::store_at, but schedules storage outside the outermost loop.
Func & parallel(const VarOrRVar &var)
Mark a dimension to be traversed in parallel.
Func & vectorize(const VarOrRVar &var)
Mark a dimension to be computed all-at-once as a single vector.
Func & compute_at(const Func &f, const Var &var)
Compute this function as needed for each unique value of the given var for the given calling function...
A Halide variable, to be used when defining functions.
This file defines the class FunctionDAG, which is our representation of a Halide pipeline,...