#include "Halide.h"
#include <stdio.h>
using namespace Halide;
int main(int argc, char **argv) {
{
Func producer(
"producer_default"), consumer(
"consumer_default");
producer(x, y) =
sin(x * y);
consumer(x, y) = (producer(x, y) +
producer(x, y+1) +
producer(x+1, y) +
producer(x+1, y+1))/4;
printf("\nEvaluating producer-consumer pipeline with default schedule\n");
float result[4][4];
for (int y = 0; y < 4; y++) {
for (int x = 0; x < 4; x++) {
result[y][x] = (
sin(x*y) +
}
}
printf("\n");
printf("Pseudo-code for the schedule:\n");
printf("\n");
}
{
Func producer(
"producer_root"), consumer(
"consumer_root");
producer(x, y) =
sin(x * y);
consumer(x, y) = (producer(x, y) +
producer(x, y+1) +
producer(x+1, y) +
producer(x+1, y+1))/4;
printf("\nEvaluating producer.compute_root()\n");
float result[4][4];
float producer_storage[5][5];
for (int y = 0; y < 5; y++) {
for (int x = 0; x < 5; x++) {
producer_storage[y][x] =
sin(x * y);
}
}
for (int y = 0; y < 4; y++) {
for (int x = 0; x < 4; x++) {
result[y][x] = (producer_storage[y][x] +
producer_storage[y+1][x] +
producer_storage[y][x+1] +
producer_storage[y+1][x+1])/4;
}
}
printf("Pseudo-code for the schedule:\n");
printf("\n");
}
{
Func producer(
"producer_y"), consumer(
"consumer_y");
producer(x, y) =
sin(x * y);
consumer(x, y) = (producer(x, y) +
producer(x, y+1) +
producer(x+1, y) +
producer(x+1, y+1))/4;
printf("\nEvaluating producer.compute_at(consumer, y)\n");
float result[4][4];
for (int y = 0; y < 4; y++) {
float producer_storage[2][5];
for (int py = y; py < y + 2; py++) {
for (int px = 0; px < 5; px++) {
producer_storage[py-y][px] =
sin(px * py);
}
}
for (int x = 0; x < 4; x++) {
result[y][x] = (producer_storage[0][x] +
producer_storage[1][x] +
producer_storage[0][x+1] +
producer_storage[1][x+1])/4;
}
}
printf("Pseudo-code for the schedule:\n");
printf("\n");
}
{
Func producer(
"producer_root_y"), consumer(
"consumer_root_y");
producer(x, y) =
sin(x * y);
consumer(x, y) = (producer(x, y) +
producer(x, y+1) +
producer(x+1, y) +
producer(x+1, y+1))/4;
printf("\nEvaluating producer.store_root().compute_at(consumer, y)\n");
float result[4][4];
float producer_storage[5][5];
for (int y = 0; y < 4; y++) {
for (int py = y; py < y + 2; py++) {
if (y > 0 && py == y) continue;
for (int px = 0; px < 5; px++) {
producer_storage[py][px] =
sin(px * py);
}
}
for (int x = 0; x < 4; x++) {
result[y][x] = (producer_storage[y][x] +
producer_storage[y+1][x] +
producer_storage[y][x+1] +
producer_storage[y+1][x+1])/4;
}
}
printf("Pseudo-code for the schedule:\n");
printf("\n");
{
float producer_storage[2][5];
for (int y = 0; y < 4; y++) {
for (int py = y; py < y + 2; py++) {
if (y > 0 && py == y) continue;
for (int px = 0; px < 5; px++) {
producer_storage[py & 1][px] =
sin(px * py);
}
}
for (int x = 0; x < 4; x++) {
result[y][x] = (producer_storage[y & 1][x] +
producer_storage[(y+1) & 1][x] +
producer_storage[y & 1][x+1] +
producer_storage[(y+1) & 1][x+1])/4;
}
}
}
}
{
Func producer(
"producer_root_x"), consumer(
"consumer_root_x");
producer(x, y) =
sin(x * y);
consumer(x, y) = (producer(x, y) +
producer(x, y+1) +
producer(x+1, y) +
producer(x+1, y+1))/4;
printf("\nEvaluating producer.store_root().compute_at(consumer, x)\n");
float result[4][4];
float producer_storage[2][5];
for (int y = 0; y < 4; y++) {
for (int x = 0; x < 4; x++) {
if (y == 0 && x == 0)
producer_storage[y & 1][x] =
sin(x*y);
if (y == 0)
producer_storage[y & 1][x+1] =
sin((x+1)*y);
if (x == 0)
producer_storage[(y+1) & 1][x] =
sin(x*(y+1));
producer_storage[(y+1) & 1][x+1] =
sin((x+1)*(y+1));
result[y][x] = (producer_storage[y & 1][x] +
producer_storage[(y+1) & 1][x] +
producer_storage[y & 1][x+1] +
producer_storage[(y+1) & 1][x+1])/4;
}
}
printf("Pseudo-code for the schedule:\n");
printf("\n");
}
{
Func producer(
"producer_tile"), consumer(
"consumer_tile");
producer(x, y) =
sin(x * y);
consumer(x, y) = (producer(x, y) +
producer(x, y+1) +
producer(x+1, y) +
producer(x+1, y+1))/4;
Var x_outer, y_outer, x_inner, y_inner;
consumer.
tile(x, y, x_outer, y_outer, x_inner, y_inner, 4, 4);
printf("\nEvaluating:\n"
"consumer.tile(x, y, x_outer, y_outer, x_inner, y_inner, 4, 4);\n"
"producer.compute_at(consumer, x_outer);\n");
float result[8][8];
for (int y_outer = 0; y_outer < 2; y_outer++) {
for (int x_outer = 0; x_outer < 2; x_outer++) {
int x_base = x_outer*4;
int y_base = y_outer*4;
float producer_storage[5][5];
for (int py = y_base; py < y_base + 5; py++) {
for (int px = x_base; px < x_base + 5; px++) {
producer_storage[py-y_base][px-x_base] =
sin(px * py);
}
}
for (int y_inner = 0; y_inner < 4; y_inner++) {
for (int x_inner = 0; x_inner < 4; x_inner++) {
int x = x_base + x_inner;
int y = y_base + y_inner;
result[y][x] =
(producer_storage[y - y_base][x - x_base] +
producer_storage[y - y_base + 1][x - x_base] +
producer_storage[y - y_base][x - x_base + 1] +
producer_storage[y - y_base + 1][x - x_base + 1])/4;
}
}
}
}
printf("Pseudo-code for the schedule:\n");
printf("\n");
}
{
Func producer(
"producer_mixed"), consumer(
"consumer_mixed");
producer(x, y) =
sin(x * y);
consumer(x, y) = (producer(x, y) +
producer(x, y+1) +
producer(x+1, y) +
producer(x+1, y+1))/4;
consumer.
split(y, yo, yi, 16);
float c_result[160][160];
for (int yo = 0; yo < 160/16 + 1; yo++) {
int y_base = yo * 16;
if (y_base > 160-16) y_base = 160-16;
float producer_storage[2][161];
for (int yi = 0; yi < 16; yi++) {
int y = y_base + yi;
for (int py = y; py < y+2; py++) {
if (yi > 0 && py == y) continue;
for (int x_vec = 0; x_vec < 160/4 + 1; x_vec++) {
int x_base = x_vec*4;
if (x_base > 161 - 4) x_base = 161 - 4;
int x[] = {x_base, x_base + 1, x_base + 2, x_base + 3};
float vec[4] = {sinf(x[0] * py), sinf(x[1] * py),
sinf(x[2] * py), sinf(x[3] * py)};
producer_storage[py & 1][x[0]] = vec[0];
producer_storage[py & 1][x[1]] = vec[1];
producer_storage[py & 1][x[2]] = vec[2];
producer_storage[py & 1][x[3]] = vec[3];
}
}
for (int x_vec = 0; x_vec < 160/4; x_vec++) {
int x_base = x_vec * 4;
int x[] = {x_base, x_base + 1, x_base + 2, x_base + 3};
float vec[] = {
(producer_storage[y & 1][x[0]] +
producer_storage[(y+1) & 1][x[0]] +
producer_storage[y & 1][x[0]+1] +
producer_storage[(y+1) & 1][x[0]+1])/4,
(producer_storage[y & 1][x[1]] +
producer_storage[(y+1) & 1][x[1]] +
producer_storage[y & 1][x[1]+1] +
producer_storage[(y+1) & 1][x[1]+1])/4,
(producer_storage[y & 1][x[2]] +
producer_storage[(y+1) & 1][x[2]] +
producer_storage[y & 1][x[2]+1] +
producer_storage[(y+1) & 1][x[2]+1])/4,
(producer_storage[y & 1][x[3]] +
producer_storage[(y+1) & 1][x[3]] +
producer_storage[y & 1][x[3]+1] +
producer_storage[(y+1) & 1][x[3]+1])/4};
c_result[y][x[0]] = vec[0];
c_result[y][x[1]] = vec[1];
c_result[y][x[2]] = vec[2];
c_result[y][x[3]] = vec[3];
}
}
}
printf("Pseudo-code for the schedule:\n");
printf("\n");
for (int y = 0; y < 160; y++) {
for (int x = 0; x < 160; x++) {
float error = halide_result(x, y) - c_result[y][x];
if (error < -0.001f || error > 0.001f) {
printf("halide_result(%d, %d) = %f instead of %f\n",
x, y, halide_result(x, y), c_result[y][x]);
return -1;
}
}
}
}
printf("Success!\n");
return 0;
}