#include "Halide.h"
#include <stdio.h>
int main(int argc, char **argv) {
{
Func producer(
"producer_default"), consumer(
"consumer_default");
producer(x, y) =
sin(x * y);
consumer(x, y) = (producer(x, y) +
producer(x, y + 1) +
producer(x + 1, y) +
producer(x + 1, y + 1)) / 4;
printf("\nEvaluating producer-consumer pipeline with default schedule\n");
float result[4][4];
for (int y = 0; y < 4; y++) {
for (int x = 0; x < 4; x++) {
result[y][x] = (
sin(x * y) +
sin((x + 1) * (y + 1))) / 4;
}
}
printf("\n");
printf("Pseudo-code for the schedule:\n");
printf("\n");
}
{
Func producer(
"producer_root"), consumer(
"consumer_root");
producer(x, y) =
sin(x * y);
consumer(x, y) = (producer(x, y) +
producer(x, y + 1) +
producer(x + 1, y) +
producer(x + 1, y + 1)) / 4;
printf("\nEvaluating producer.compute_root()\n");
float result[4][4];
float producer_storage[5][5];
for (int y = 0; y < 5; y++) {
for (int x = 0; x < 5; x++) {
producer_storage[y][x] =
sin(x * y);
}
}
for (int y = 0; y < 4; y++) {
for (int x = 0; x < 4; x++) {
result[y][x] = (producer_storage[y][x] +
producer_storage[y + 1][x] +
producer_storage[y][x + 1] +
producer_storage[y + 1][x + 1]) / 4;
}
}
printf("Pseudo-code for the schedule:\n");
printf("\n");
}
{
Func producer(
"producer_y"), consumer(
"consumer_y");
producer(x, y) =
sin(x * y);
consumer(x, y) = (producer(x, y) +
producer(x, y + 1) +
producer(x + 1, y) +
producer(x + 1, y + 1)) / 4;
printf("\nEvaluating producer.compute_at(consumer, y)\n");
float result[4][4];
for (int y = 0; y < 4; y++) {
float producer_storage[2][5];
for (int py = y; py < y + 2; py++) {
for (int px = 0; px < 5; px++) {
producer_storage[py - y][px] =
sin(px * py);
}
}
for (int x = 0; x < 4; x++) {
result[y][x] = (producer_storage[0][x] +
producer_storage[1][x] +
producer_storage[0][x + 1] +
producer_storage[1][x + 1]) / 4;
}
}
printf("Pseudo-code for the schedule:\n");
printf("\n");
}
{
Func producer(
"producer_root_y"), consumer(
"consumer_root_y");
producer(x, y) =
sin(x * y);
consumer(x, y) = (producer(x, y) +
producer(x, y + 1) +
producer(x + 1, y) +
producer(x + 1, y + 1)) / 4;
printf("\nEvaluating producer.store_root().compute_at(consumer, y)\n");
float result[4][4];
float producer_storage[5][5];
for (int y = 0; y < 4; y++) {
for (int py = y; py < y + 2; py++) {
if (y > 0 && py == y) continue;
for (int px = 0; px < 5; px++) {
producer_storage[py][px] =
sin(px * py);
}
}
for (int x = 0; x < 4; x++) {
result[y][x] = (producer_storage[y][x] +
producer_storage[y + 1][x] +
producer_storage[y][x + 1] +
producer_storage[y + 1][x + 1]) / 4;
}
}
printf("Pseudo-code for the schedule:\n");
printf("\n");
{
float producer_storage[2][5];
for (int y = 0; y < 4; y++) {
for (int py = y; py < y + 2; py++) {
if (y > 0 && py == y) continue;
for (int px = 0; px < 5; px++) {
producer_storage[py & 1][px] =
sin(px * py);
}
}
for (int x = 0; x < 4; x++) {
result[y][x] = (producer_storage[y & 1][x] +
producer_storage[(y + 1) & 1][x] +
producer_storage[y & 1][x + 1] +
producer_storage[(y + 1) & 1][x + 1]) / 4;
}
}
}
}
{
Func producer(
"producer_root_x"), consumer(
"consumer_root_x");
producer(x, y) =
sin(x * y);
consumer(x, y) = (producer(x, y) +
producer(x, y + 1) +
producer(x + 1, y) +
producer(x + 1, y + 1)) / 4;
printf("\nEvaluating producer.store_root().compute_at(consumer, x)\n");
float result[4][4];
float producer_storage[2][5];
for (int y = 0; y < 4; y++) {
for (int x = 0; x < 4; x++) {
if (y == 0 && x == 0) {
producer_storage[y & 1][x] =
sin(x * y);
}
if (y == 0) {
producer_storage[y & 1][x + 1] =
sin((x + 1) * y);
}
if (x == 0) {
producer_storage[(y + 1) & 1][x] =
sin(x * (y + 1));
}
producer_storage[(y + 1) & 1][x + 1] =
sin((x + 1) * (y + 1));
result[y][x] = (producer_storage[y & 1][x] +
producer_storage[(y + 1) & 1][x] +
producer_storage[y & 1][x + 1] +
producer_storage[(y + 1) & 1][x + 1]) / 4;
}
}
printf("Pseudo-code for the schedule:\n");
printf("\n");
}
{
Func producer(
"producer_tile"), consumer(
"consumer_tile");
producer(x, y) =
sin(x * y);
consumer(x, y) = (producer(x, y) +
producer(x, y + 1) +
producer(x + 1, y) +
producer(x + 1, y + 1)) / 4;
Var x_outer, y_outer, x_inner, y_inner;
consumer.
tile(x, y, x_outer, y_outer, x_inner, y_inner, 4, 4);
printf("\nEvaluating:\n"
"consumer.tile(x, y, x_outer, y_outer, x_inner, y_inner, 4, 4);\n"
"producer.compute_at(consumer, x_outer);\n");
float result[8][8];
for (int y_outer = 0; y_outer < 2; y_outer++) {
for (int x_outer = 0; x_outer < 2; x_outer++) {
int x_base = x_outer * 4;
int y_base = y_outer * 4;
float producer_storage[5][5];
for (int py = y_base; py < y_base + 5; py++) {
for (int px = x_base; px < x_base + 5; px++) {
producer_storage[py - y_base][px - x_base] =
sin(px * py);
}
}
for (int y_inner = 0; y_inner < 4; y_inner++) {
for (int x_inner = 0; x_inner < 4; x_inner++) {
int x = x_base + x_inner;
int y = y_base + y_inner;
result[y][x] =
(producer_storage[y - y_base][x - x_base] +
producer_storage[y - y_base + 1][x - x_base] +
producer_storage[y - y_base][x - x_base + 1] +
producer_storage[y - y_base + 1][x - x_base + 1]) / 4;
}
}
}
}
printf("Pseudo-code for the schedule:\n");
printf("\n");
}
{
Func producer(
"producer_mixed"), consumer(
"consumer_mixed");
producer(x, y) =
sin(x * y);
consumer(x, y) = (producer(x, y) +
producer(x, y + 1) +
producer(x + 1, y) +
producer(x + 1, y + 1)) / 4;
consumer.
split(y, yo, yi, 16);
float c_result[160][160];
for (int yo = 0; yo < 160 / 16; yo++) {
int y_base = yo * 16;
float producer_storage[2][161];
for (int yi = 0; yi < 16; yi++) {
int y = y_base + yi;
for (int py = y; py < y + 2; py++) {
if (yi > 0 && py == y) continue;
for (int x_vec = 0; x_vec < 160 / 4 + 1; x_vec++) {
int x_base = x_vec * 4;
if (x_base > 161 - 4) x_base = 161 - 4;
int x[] = {x_base, x_base + 1, x_base + 2, x_base + 3};
float vec[4] = {sinf(x[0] * py), sinf(x[1] * py),
sinf(x[2] * py), sinf(x[3] * py)};
producer_storage[py & 1][x[0]] = vec[0];
producer_storage[py & 1][x[1]] = vec[1];
producer_storage[py & 1][x[2]] = vec[2];
producer_storage[py & 1][x[3]] = vec[3];
}
}
for (int x_vec = 0; x_vec < 160 / 4; x_vec++) {
int x_base = x_vec * 4;
int x[] = {x_base, x_base + 1, x_base + 2, x_base + 3};
float vec[] = {
(producer_storage[y & 1][x[0]] +
producer_storage[(y + 1) & 1][x[0]] +
producer_storage[y & 1][x[0] + 1] +
producer_storage[(y + 1) & 1][x[0] + 1]) /
4,
(producer_storage[y & 1][x[1]] +
producer_storage[(y + 1) & 1][x[1]] +
producer_storage[y & 1][x[1] + 1] +
producer_storage[(y + 1) & 1][x[1] + 1]) /
4,
(producer_storage[y & 1][x[2]] +
producer_storage[(y + 1) & 1][x[2]] +
producer_storage[y & 1][x[2] + 1] +
producer_storage[(y + 1) & 1][x[2] + 1]) /
4,
(producer_storage[y & 1][x[3]] +
producer_storage[(y + 1) & 1][x[3]] +
producer_storage[y & 1][x[3] + 1] +
producer_storage[(y + 1) & 1][x[3] + 1]) /
4};
c_result[y][x[0]] = vec[0];
c_result[y][x[1]] = vec[1];
c_result[y][x[2]] = vec[2];
c_result[y][x[3]] = vec[3];
}
}
}
printf("Pseudo-code for the schedule:\n");
printf("\n");
for (int y = 0; y < 160; y++) {
for (int x = 0; x < 160; x++) {
float error = halide_result(x, y) - c_result[y][x];
if (error < -0.001f || error > 0.001f) {
printf("halide_result(%d, %d) = %f instead of %f\n",
x, y, halide_result(x, y), c_result[y][x]);
return -1;
}
}
}
}
printf("Success!\n");
return 0;
}